diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 933130125f..6edcb670a4 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -1904,15 +1904,15 @@ fs.s3a.change.detection.mode server - Determines how change detection is applied to alert to S3 objects - rewritten while being read. Value 'server' indicates to apply the attribute - constraint directly on GetObject requests to S3. Value 'client' means to do a - client-side comparison of the attribute value returned in the response. Value - 'server' would not work with third-party S3 implementations that do not - support these constraints on GetObject. Values 'server' and 'client' generate - RemoteObjectChangedException when a mismatch is detected. Value 'warn' works - like 'client' but generates only a warning. Value 'none' will ignore change - detection completely. + Determines how change detection is applied to alert to inconsistent S3 + objects read during or after an overwrite. Value 'server' indicates to apply + the attribute constraint directly on GetObject requests to S3. Value 'client' + means to do a client-side comparison of the attribute value returned in the + response. Value 'server' would not work with third-party S3 implementations + that do not support these constraints on GetObject. Values 'server' and + 'client' generate RemoteObjectChangedException when a mismatch is detected. + Value 'warn' works like 'client' but generates only a warning. Value 'none' + will ignore change detection completely. diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index 9419e48532..016ce40ccc 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -406,6 +406,11 @@ hadoop-common provided + + org.apache.httpcomponents + httpcore + provided + org.apache.hadoop hadoop-common diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java index 68a69f3932..a59ffa9c6e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java @@ -197,6 +197,33 @@ public void retry(String action, }); } + /** + * Execute a void operation with retry processing when doRetry=true, else + * just once. + * @param doRetry true if retries should be performed + * @param action action to execute (used in error messages) + * @param path path of work (used in error messages) + * @param idempotent does the operation have semantics + * which mean that it can be retried even if was already executed? + * @param retrying callback on retries + * @param operation operation to execute + * @throws IOException any IOE raised, or translated exception + */ + @Retries.RetryTranslated + public void maybeRetry(boolean doRetry, + String action, + String path, + boolean idempotent, + Retried retrying, + VoidOperation operation) + throws IOException { + maybeRetry(doRetry, action, path, idempotent, retrying, + () -> { + operation.execute(); + return null; + }); + } + /** * Execute a void operation with the default retry callback invoked. * @param action action to execute (used in error messages) @@ -215,6 +242,28 @@ public void retry(String action, retry(action, path, idempotent, retryCallback, operation); } + /** + * Execute a void operation with the default retry callback invoked when + * doRetry=true, else just once. + * @param doRetry true if retries should be performed + * @param action action to execute (used in error messages) + * @param path path of work (used in error messages) + * @param idempotent does the operation have semantics + * which mean that it can be retried even if was already executed? + * @param operation operation to execute + * @throws IOException any IOE raised, or translated exception + */ + @Retries.RetryTranslated + public void maybeRetry( + boolean doRetry, + String action, + String path, + boolean idempotent, + VoidOperation operation) + throws IOException { + maybeRetry(doRetry, action, path, idempotent, retryCallback, operation); + } + /** * Execute a function with the default retry callback invoked. * @param action action to execute (used in error messages) @@ -265,6 +314,41 @@ public T retry( () -> once(action, path, operation)); } + /** + * Execute a function with retry processing when doRetry=true, else just once. + * Uses {@link #once(String, String, Operation)} as the inner + * invocation mechanism before retry logic is performed. + * @param type of return value + * @param doRetry true if retries should be performed + * @param action action to execute (used in error messages) + * @param path path of work (used in error messages) + * @param idempotent does the operation have semantics + * which mean that it can be retried even if was already executed? + * @param retrying callback on retries + * @param operation operation to execute + * @return the result of the call + * @throws IOException any IOE raised, or translated exception + */ + @Retries.RetryTranslated + public T maybeRetry( + boolean doRetry, + String action, + @Nullable String path, + boolean idempotent, + Retried retrying, + Operation operation) + throws IOException { + if (doRetry) { + return retryUntranslated( + toDescription(action, path), + idempotent, + retrying, + () -> once(action, path, operation)); + } else { + return once(action, path, operation); + } + } + /** * Execute a function with retry processing and no translation. * and the default retry callback. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java index b016eadcfb..0f8c52be18 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java @@ -33,10 +33,11 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; -import java.util.HashSet; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.ListIterator; +import java.util.Map; import java.util.NoSuchElementException; import java.util.Set; @@ -68,7 +69,7 @@ public Listing(S3AFileSystem owner) { * @return the file status iterator */ ProvidedFileStatusIterator createProvidedFileStatusIterator( - FileStatus[] fileStatuses, + S3AFileStatus[] fileStatuses, PathFilter filter, FileStatusAcceptor acceptor) { return new ProvidedFileStatusIterator(fileStatuses, filter, acceptor); @@ -114,7 +115,7 @@ FileStatusListingIterator createFileStatusListingIterator( S3ListRequest request, PathFilter filter, Listing.FileStatusAcceptor acceptor, - RemoteIterator providedStatus) throws IOException { + RemoteIterator providedStatus) throws IOException { return new FileStatusListingIterator( new ObjectListingIterator(listPath, request), filter, @@ -129,7 +130,7 @@ FileStatusListingIterator createFileStatusListingIterator( */ @VisibleForTesting LocatedFileStatusIterator createLocatedFileStatusIterator( - RemoteIterator statusIterator) { + RemoteIterator statusIterator) { return new LocatedFileStatusIterator(statusIterator); } @@ -143,7 +144,7 @@ LocatedFileStatusIterator createLocatedFileStatusIterator( */ @VisibleForTesting TombstoneReconcilingIterator createTombstoneReconcilingIterator( - RemoteIterator iterator, Set tombstones) { + RemoteIterator iterator, Set tombstones) { return new TombstoneReconcilingIterator(iterator, tombstones); } @@ -189,19 +190,19 @@ interface FileStatusAcceptor { * iterator returned. */ static final class SingleStatusRemoteIterator - implements RemoteIterator { + implements RemoteIterator { /** * The status to return; set to null after the first iteration. */ - private LocatedFileStatus status; + private S3ALocatedFileStatus status; /** * Constructor. * @param status status value: may be null, in which case * the iterator is empty. */ - public SingleStatusRemoteIterator(LocatedFileStatus status) { + SingleStatusRemoteIterator(S3ALocatedFileStatus status) { this.status = status; } @@ -226,9 +227,9 @@ public boolean hasNext() throws IOException { * to the constructor. */ @Override - public LocatedFileStatus next() throws IOException { + public S3ALocatedFileStatus next() throws IOException { if (hasNext()) { - LocatedFileStatus s = this.status; + S3ALocatedFileStatus s = this.status; status = null; return s; } else { @@ -247,16 +248,16 @@ public LocatedFileStatus next() throws IOException { * There is no remote data to fetch. */ static class ProvidedFileStatusIterator - implements RemoteIterator { - private final ArrayList filteredStatusList; + implements RemoteIterator { + private final ArrayList filteredStatusList; private int index = 0; - ProvidedFileStatusIterator(FileStatus[] fileStatuses, PathFilter filter, + ProvidedFileStatusIterator(S3AFileStatus[] fileStatuses, PathFilter filter, FileStatusAcceptor acceptor) { Preconditions.checkArgument(fileStatuses != null, "Null status list!"); filteredStatusList = new ArrayList<>(fileStatuses.length); - for (FileStatus status : fileStatuses) { + for (S3AFileStatus status : fileStatuses) { if (filter.accept(status.getPath()) && acceptor.accept(status)) { filteredStatusList.add(status); } @@ -270,7 +271,7 @@ public boolean hasNext() throws IOException { } @Override - public FileStatus next() throws IOException { + public S3AFileStatus next() throws IOException { if (!hasNext()) { throw new NoSuchElementException(); } @@ -305,7 +306,7 @@ public FileStatus next() throws IOException { * Thread safety: None. */ class FileStatusListingIterator - implements RemoteIterator { + implements RemoteIterator { /** Source of objects. */ private final ObjectListingIterator source; @@ -316,10 +317,10 @@ class FileStatusListingIterator /** request batch size. */ private int batchSize; /** Iterator over the current set of results. */ - private ListIterator statusBatchIterator; + private ListIterator statusBatchIterator; - private final Set providedStatus; - private Iterator providedStatusIterator; + private final Map providedStatus; + private Iterator providedStatusIterator; /** * Create an iterator over file status entries. @@ -335,15 +336,16 @@ class FileStatusListingIterator FileStatusListingIterator(ObjectListingIterator source, PathFilter filter, FileStatusAcceptor acceptor, - RemoteIterator providedStatus) throws IOException { + RemoteIterator providedStatus) throws IOException { this.source = source; this.filter = filter; this.acceptor = acceptor; - this.providedStatus = new HashSet<>(); + this.providedStatus = new HashMap<>(); for (; providedStatus != null && providedStatus.hasNext();) { - final FileStatus status = providedStatus.next(); - if (filter.accept(status.getPath()) && acceptor.accept(status)) { - this.providedStatus.add(status); + final S3AFileStatus status = providedStatus.next(); + Path path = status.getPath(); + if (filter.accept(path) && acceptor.accept(status)) { + this.providedStatus.put(path, status); } } // build the first set of results. This will not trigger any @@ -376,7 +378,7 @@ private boolean sourceHasNext() throws IOException { // turn to file status that are only in provided list if (providedStatusIterator == null) { LOG.debug("Start iterating the provided status."); - providedStatusIterator = providedStatus.iterator(); + providedStatusIterator = providedStatus.values().iterator(); } return false; } @@ -384,14 +386,21 @@ private boolean sourceHasNext() throws IOException { @Override @Retries.RetryTranslated - public FileStatus next() throws IOException { - final FileStatus status; + public S3AFileStatus next() throws IOException { + final S3AFileStatus status; if (sourceHasNext()) { status = statusBatchIterator.next(); - // We remove from provided list the file status listed by S3 so that + // We remove from provided map the file status listed by S3 so that // this does not return duplicate items. - if (providedStatus.remove(status)) { - LOG.debug("Removed the status from provided file status {}", status); + + // The provided status is returned as it is assumed to have the better + // metadata (i.e. the eTag and versionId from S3Guard) + S3AFileStatus provided = providedStatus.remove(status.getPath()); + if (provided != null) { + LOG.debug( + "Removed and returned the status from provided file status {}", + status); + return provided; } } else { if (providedStatusIterator.hasNext()) { @@ -441,7 +450,7 @@ private boolean buildNextStatusBatch(S3ListResult objects) { // counters for debug logs int added = 0, ignored = 0; // list to fill in with results. Initial size will be list maximum. - List stats = new ArrayList<>( + List stats = new ArrayList<>( objects.getObjectSummaries().size() + objects.getCommonPrefixes().size()); // objects @@ -453,8 +462,9 @@ private boolean buildNextStatusBatch(S3ListResult objects) { } // Skip over keys that are ourselves and old S3N _$folder$ files if (acceptor.accept(keyPath, summary) && filter.accept(keyPath)) { - FileStatus status = createFileStatus(keyPath, summary, - owner.getDefaultBlockSize(keyPath), owner.getUsername()); + S3AFileStatus status = createFileStatus(keyPath, summary, + owner.getDefaultBlockSize(keyPath), owner.getUsername(), + null, null); LOG.debug("Adding: {}", status); stats.add(status); added++; @@ -468,7 +478,7 @@ private boolean buildNextStatusBatch(S3ListResult objects) { for (String prefix : objects.getCommonPrefixes()) { Path keyPath = owner.keyToQualifiedPath(prefix); if (acceptor.accept(keyPath, prefix) && filter.accept(keyPath)) { - FileStatus status = new S3AFileStatus(Tristate.FALSE, keyPath, + S3AFileStatus status = new S3AFileStatus(Tristate.FALSE, keyPath, owner.getUsername()); LOG.debug("Adding directory: {}", status); added++; @@ -679,14 +689,14 @@ public boolean accept(FileStatus status) { * return a remote iterator of {@link LocatedFileStatus} instances. */ class LocatedFileStatusIterator - implements RemoteIterator { - private final RemoteIterator statusIterator; + implements RemoteIterator { + private final RemoteIterator statusIterator; /** * Constructor. * @param statusIterator an iterator over the remote status entries */ - LocatedFileStatusIterator(RemoteIterator statusIterator) { + LocatedFileStatusIterator(RemoteIterator statusIterator) { this.statusIterator = statusIterator; } @@ -696,7 +706,7 @@ public boolean hasNext() throws IOException { } @Override - public LocatedFileStatus next() throws IOException { + public S3ALocatedFileStatus next() throws IOException { return owner.toLocatedFileStatus(statusIterator.next()); } } @@ -708,16 +718,16 @@ public LocatedFileStatus next() throws IOException { * remain in the source iterator. */ static class TombstoneReconcilingIterator implements - RemoteIterator { - private LocatedFileStatus next = null; - private final RemoteIterator iterator; + RemoteIterator { + private S3ALocatedFileStatus next = null; + private final RemoteIterator iterator; private final Set tombstones; /** * @param iterator Source iterator to filter * @param tombstones set of tombstone markers to filter out of results */ - TombstoneReconcilingIterator(RemoteIterator + TombstoneReconcilingIterator(RemoteIterator iterator, Set tombstones) { this.iterator = iterator; if (tombstones != null) { @@ -729,7 +739,7 @@ static class TombstoneReconcilingIterator implements private boolean fetch() throws IOException { while (next == null && iterator.hasNext()) { - LocatedFileStatus candidate = iterator.next(); + S3ALocatedFileStatus candidate = iterator.next(); if (!tombstones.contains(candidate.getPath())) { next = candidate; return true; @@ -745,9 +755,9 @@ public boolean hasNext() throws IOException { return fetch(); } - public LocatedFileStatus next() throws IOException { + public S3ALocatedFileStatus next() throws IOException { if (hasNext()) { - LocatedFileStatus result = next; + S3ALocatedFileStatus result = next; next = null; fetch(); return result; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/RemoteFileChangedException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/RemoteFileChangedException.java index cfa5935bbf..1df2d7ee20 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/RemoteFileChangedException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/RemoteFileChangedException.java @@ -32,6 +32,9 @@ @InterfaceStability.Unstable public class RemoteFileChangedException extends PathIOException { + public static final String PRECONDITIONS_FAILED = + "Constraints of request were unsatisfiable"; + /** * Constructs a RemoteFileChangedException. * @@ -46,4 +49,21 @@ public RemoteFileChangedException(String path, super(path, message); setOperation(operation); } + + /** + * Constructs a RemoteFileChangedException. + * + * @param path the path accessed when the change was detected + * @param operation the operation (e.g. open, re-open) performed when the + * change was detected + * @param message a message providing more details about the condition + * @param cause inner cause. + */ + public RemoteFileChangedException(String path, + String operation, + String message, + Throwable cause) { + super(path, message, cause); + setOperation(operation); + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java index ca6ca908be..e8ff846f20 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileStatus.java @@ -32,6 +32,8 @@ @InterfaceStability.Evolving public class S3AFileStatus extends FileStatus { private Tristate isEmptyDirectory; + private String eTag; + private String versionId; /** * Create a directory status. @@ -69,15 +71,17 @@ public S3AFileStatus(Tristate isemptydir, * @param path path * @param blockSize block size * @param owner owner + * @param eTag eTag of the S3 object if available, else null + * @param versionId versionId of the S3 object if available, else null */ public S3AFileStatus(long length, long modification_time, Path path, - long blockSize, String owner) { - super(length, false, 1, blockSize, modification_time, 0, - null, null, null, null, + long blockSize, String owner, String eTag, String versionId) { + super(length, false, 1, blockSize, modification_time, + 0, null, owner, owner, null, path, false, true, false); isEmptyDirectory = Tristate.FALSE; - setOwner(owner); - setGroup(owner); + this.eTag = eTag; + this.versionId = versionId; } /** @@ -86,16 +90,19 @@ public S3AFileStatus(long length, long modification_time, Path path, * @param source FileStatus to convert to S3AFileStatus * @param isEmptyDirectory TRUE/FALSE if known to be / not be an empty * directory, UNKNOWN if that information was not computed. + * @param eTag eTag of the S3 object if available, else null + * @param versionId versionId of the S3 object if available, else null * @return a new S3AFileStatus */ public static S3AFileStatus fromFileStatus(FileStatus source, - Tristate isEmptyDirectory) { + Tristate isEmptyDirectory, String eTag, String versionId) { if (source.isDirectory()) { return new S3AFileStatus(isEmptyDirectory, source.getPath(), source.getOwner()); } else { return new S3AFileStatus(source.getLen(), source.getModificationTime(), - source.getPath(), source.getBlockSize(), source.getOwner()); + source.getPath(), source.getBlockSize(), source.getOwner(), + eTag, versionId); } } @@ -109,6 +116,28 @@ public Tristate isEmptyDirectory() { return isEmptyDirectory; } + /** + * Update isEmptyDirectory attribute. + * @param isEmptyDirectory new isEmptyDirectory value + */ + public void setIsEmptyDirectory(Tristate isEmptyDirectory) { + this.isEmptyDirectory = isEmptyDirectory; + } + + /** + * @return the S3 object eTag when available, else null. + */ + public String getETag() { + return eTag; + } + + /** + * @return the S3 object versionId when available, else null. + */ + public String getVersionId() { + return versionId; + } + /** Compare if this object is equal to another object. * @param o the object to be compared. * @return true if two file status has the same path name; false if not. @@ -150,8 +179,10 @@ public long getModificationTime(){ @Override public String toString() { - return super.toString() + - String.format(" isEmptyDirectory=%s", isEmptyDirectory().name()); + return super.toString() + + String.format(" isEmptyDirectory=%s", isEmptyDirectory().name() + + String.format(" eTag=%s", eTag) + + String.format(" versionId=%s", versionId)); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 7c82aa6b90..e6850e9e7c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -49,6 +49,7 @@ import com.amazonaws.AmazonClientException; import com.amazonaws.AmazonServiceException; +import com.amazonaws.SdkBaseException; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.AbortMultipartUploadRequest; import com.amazonaws.services.s3.model.CannedAccessControlList; @@ -74,6 +75,7 @@ import com.amazonaws.services.s3.transfer.TransferManager; import com.amazonaws.services.s3.transfer.TransferManagerConfiguration; import com.amazonaws.services.s3.transfer.Upload; +import com.amazonaws.services.s3.transfer.model.CopyResult; import com.amazonaws.services.s3.transfer.model.UploadResult; import com.amazonaws.event.ProgressListener; import com.google.common.annotations.VisibleForTesting; @@ -89,6 +91,7 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.CopyOutcome; import org.apache.hadoop.fs.s3a.select.InternalSelectConstants; import org.apache.hadoop.util.LambdaUtils; import org.apache.hadoop.fs.FileAlreadyExistsException; @@ -115,6 +118,7 @@ import org.apache.hadoop.fs.s3a.commit.CommitConstants; import org.apache.hadoop.fs.s3a.commit.PutTracker; import org.apache.hadoop.fs.s3a.commit.MagicCommitIntegration; +import org.apache.hadoop.fs.s3a.impl.ChangeTracker; import org.apache.hadoop.fs.s3a.select.SelectBinding; import org.apache.hadoop.fs.s3a.select.SelectConstants; import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata; @@ -324,26 +328,7 @@ public void initialize(URI name, Configuration originalConf) readAhead = longBytesOption(conf, READAHEAD_RANGE, DEFAULT_READAHEAD_RANGE, 0); - int maxThreads = conf.getInt(MAX_THREADS, DEFAULT_MAX_THREADS); - if (maxThreads < 2) { - LOG.warn(MAX_THREADS + " must be at least 2: forcing to 2."); - maxThreads = 2; - } - int totalTasks = intOption(conf, - MAX_TOTAL_TASKS, DEFAULT_MAX_TOTAL_TASKS, 1); - long keepAliveTime = longOption(conf, KEEPALIVE_TIME, - DEFAULT_KEEPALIVE_TIME, 0); - boundedThreadPool = BlockingThreadPoolExecutorService.newInstance( - maxThreads, - maxThreads + totalTasks, - keepAliveTime, TimeUnit.SECONDS, - "s3a-transfer-shared"); - unboundedThreadPool = new ThreadPoolExecutor( - maxThreads, Integer.MAX_VALUE, - keepAliveTime, TimeUnit.SECONDS, - new LinkedBlockingQueue(), - BlockingThreadPoolExecutorService.newDaemonThreadFactory( - "s3a-transfer-unbounded")); + initThreadPools(conf); int listVersion = conf.getInt(LIST_VERSION, DEFAULT_LIST_VERSION); if (listVersion < 1 || listVersion > 2) { @@ -412,6 +397,29 @@ public void initialize(URI name, Configuration originalConf) } + private void initThreadPools(Configuration conf) { + int maxThreads = conf.getInt(MAX_THREADS, DEFAULT_MAX_THREADS); + if (maxThreads < 2) { + LOG.warn(MAX_THREADS + " must be at least 2: forcing to 2."); + maxThreads = 2; + } + int totalTasks = intOption(conf, + MAX_TOTAL_TASKS, DEFAULT_MAX_TOTAL_TASKS, 1); + long keepAliveTime = longOption(conf, KEEPALIVE_TIME, + DEFAULT_KEEPALIVE_TIME, 0); + boundedThreadPool = BlockingThreadPoolExecutorService.newInstance( + maxThreads, + maxThreads + totalTasks, + keepAliveTime, TimeUnit.SECONDS, + "s3a-transfer-shared"); + unboundedThreadPool = new ThreadPoolExecutor( + maxThreads, Integer.MAX_VALUE, + keepAliveTime, TimeUnit.SECONDS, + new LinkedBlockingQueue(), + BlockingThreadPoolExecutorService.newDaemonThreadFactory( + "s3a-transfer-unbounded")); + } + /** * Create the storage statistics or bind to an existing one. * @return a storage statistics instance. @@ -652,6 +660,13 @@ protected void setAmazonS3Client(AmazonS3 client) { Preconditions.checkNotNull(client, "client"); LOG.debug("Setting S3 client to {}", client); s3 = client; + + // Need to use a new TransferManager that uses the new client. + // Also, using a new TransferManager requires a new threadpool as the old + // TransferManager will shut the thread pool down when it is garbage + // collected. + initThreadPools(getConf()); + initTransferManager(); } /** @@ -697,10 +712,11 @@ public S3AInputPolicy getInputPolicy() { /** * Get the change detection policy for this FS instance. + * Only public to allow access in tests in other packages. * @return the change detection policy */ @VisibleForTesting - ChangeDetectionPolicy getChangeDetectionPolicy() { + public ChangeDetectionPolicy getChangeDetectionPolicy() { return changeDetectionPolicy; } @@ -877,7 +893,7 @@ private FSDataInputStream open( throws IOException { entryPoint(INVOCATION_OPEN); - final FileStatus fileStatus = getFileStatus(path); + final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path); if (fileStatus.isDirectory()) { throw new FileNotFoundException("Can't open " + path + " because it is a directory"); @@ -910,7 +926,10 @@ private FSDataInputStream open( return new FSDataInputStream( new S3AInputStream( readContext, - createObjectAttributes(path), + createObjectAttributes( + path, + fileStatus.getETag(), + fileStatus.getVersionId()), fileStatus.getLen(), s3)); } @@ -943,13 +962,20 @@ private S3AReadOpContext createReadContext( /** * Create the attributes of an object for a get/select request. * @param f path path of the request. + * @param eTag the eTag of the S3 object + * @param versionId S3 object version ID * @return attributes to use when building the query. */ - private S3ObjectAttributes createObjectAttributes(final Path f) { + private S3ObjectAttributes createObjectAttributes( + final Path f, + final String eTag, + final String versionId) { return new S3ObjectAttributes(bucket, pathToKey(f), getServerSideEncryptionAlgorithm(), - encryptionSecrets.getEncryptionKey()); + encryptionSecrets.getEncryptionKey(), + eTag, + versionId); } /** @@ -1213,19 +1239,27 @@ private boolean innerRename(Path source, Path dest) if (srcStatus.isFile()) { LOG.debug("rename: renaming file {} to {}", src, dst); long length = srcStatus.getLen(); + S3ObjectAttributes objectAttributes = + createObjectAttributes(srcStatus.getPath(), + srcStatus.getETag(), srcStatus.getVersionId()); + S3AReadOpContext readContext = createReadContext(srcStatus, inputPolicy, + changeDetectionPolicy, readAhead); if (dstStatus != null && dstStatus.isDirectory()) { String newDstKey = maybeAddTrailingSlash(dstKey); String filename = srcKey.substring(pathToKey(src.getParent()).length()+1); newDstKey = newDstKey + filename; - copyFile(srcKey, newDstKey, length); + CopyResult copyResult = copyFile(srcKey, newDstKey, length, + objectAttributes, readContext); S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, src, keyToQualifiedPath(newDstKey), length, getDefaultBlockSize(dst), - username); + username, copyResult.getETag(), copyResult.getVersionId()); } else { - copyFile(srcKey, dstKey, srcStatus.getLen()); + CopyResult copyResult = copyFile(srcKey, dstKey, srcStatus.getLen(), + objectAttributes, readContext); S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, src, dst, - length, getDefaultBlockSize(dst), username); + length, getDefaultBlockSize(dst), username, + copyResult.getETag(), copyResult.getVersionId()); } innerDelete(srcStatus, false); } else { @@ -1248,10 +1282,10 @@ private boolean innerRename(Path source, Path dest) } Path parentPath = keyToQualifiedPath(srcKey); - RemoteIterator iterator = listFilesAndEmptyDirectories( - parentPath, true); + RemoteIterator iterator = + listFilesAndEmptyDirectories(parentPath, true); while (iterator.hasNext()) { - LocatedFileStatus status = iterator.next(); + S3ALocatedFileStatus status = iterator.next(); long length = status.getLen(); String key = pathToKey(status.getPath()); if (status.isDirectory() && !key.endsWith("/")) { @@ -1261,7 +1295,13 @@ private boolean innerRename(Path source, Path dest) .add(new DeleteObjectsRequest.KeyVersion(key)); String newDstKey = dstKey + key.substring(srcKey.length()); - copyFile(key, newDstKey, length); + S3ObjectAttributes objectAttributes = + createObjectAttributes(status.getPath(), + status.getETag(), status.getVersionId()); + S3AReadOpContext readContext = createReadContext(status, inputPolicy, + changeDetectionPolicy, readAhead); + CopyResult copyResult = copyFile(key, newDstKey, length, + objectAttributes, readContext); if (hasMetadataStore()) { // with a metadata store, the object entries need to be updated, @@ -1273,7 +1313,8 @@ private boolean innerRename(Path source, Path dest) childDst, username); } else { S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, childSrc, - childDst, length, getDefaultBlockSize(childDst), username); + childDst, length, getDefaultBlockSize(childDst), username, + copyResult.getETag(), copyResult.getVersionId()); } // Ancestor directories may not be listed, so we explicitly add them S3Guard.addMoveAncestors(metadataStore, srcPaths, dstMetas, @@ -1319,10 +1360,28 @@ private boolean innerRename(Path source, Path dest) @VisibleForTesting @Retries.RetryTranslated public ObjectMetadata getObjectMetadata(Path path) throws IOException { + return getObjectMetadata(path, null, invoker, null); + } + + /** + * Low-level call to get at the object metadata. + * @param path path to the object + * @param changeTracker the change tracker to detect version inconsistencies + * @param changeInvoker the invoker providing the retry policy + * @param operation the operation being performed (e.g. "read" or "copy") + * @return metadata + * @throws IOException IO and object access problems. + */ + @VisibleForTesting + @Retries.RetryTranslated + public ObjectMetadata getObjectMetadata(Path path, + ChangeTracker changeTracker, Invoker changeInvoker, String operation) + throws IOException { return once("getObjectMetadata", path.toString(), () -> - // this always does a full HEAD to the object - getObjectMetadata(pathToKey(path))); + // this always does a full HEAD to the object + getObjectMetadata( + pathToKey(path), changeTracker, changeInvoker, operation)); } /** @@ -1493,14 +1552,41 @@ public S3AStorageStatistics getStorageStatistics() { */ @Retries.RetryRaw protected ObjectMetadata getObjectMetadata(String key) throws IOException { + return getObjectMetadata(key, null, invoker,null); + } + + /** + * Request object metadata; increments counters in the process. + * Retry policy: retry untranslated. + * Uses changeTracker to detect an unexpected file version (eTag or versionId) + * @param key key + * @param changeTracker the change tracker to detect unexpected object version + * @param changeInvoker the invoker providing the retry policy + * @param operation the operation (e.g. "read" or "copy") triggering this call + * @return the metadata + * @throws IOException if the retry invocation raises one (it shouldn't). + * @throws RemoteFileChangedException if an unexpected version is detected + */ + @Retries.RetryRaw + protected ObjectMetadata getObjectMetadata(String key, + ChangeTracker changeTracker, + Invoker changeInvoker, + String operation) throws IOException { GetObjectMetadataRequest request = new GetObjectMetadataRequest(bucket, key); //SSE-C requires to be filled in if enabled for object metadata generateSSECustomerKey().ifPresent(request::setSSECustomerKey); - ObjectMetadata meta = invoker.retryUntranslated("GET " + key, true, + ObjectMetadata meta = changeInvoker.retryUntranslated("GET " + key, true, () -> { incrementStatistic(OBJECT_METADATA_REQUESTS); - return s3.getObjectMetadata(request); + if (changeTracker != null) { + changeTracker.maybeApplyConstraint(request); + } + ObjectMetadata objectMetadata = s3.getObjectMetadata(request); + if (changeTracker != null) { + changeTracker.processMetadata(objectMetadata, operation); + } + return objectMetadata; }); incrementReadOperations(); return meta; @@ -1802,7 +1888,8 @@ PutObjectResult putObjectDirect(PutObjectRequest putObjectRequest) PutObjectResult result = s3.putObject(putObjectRequest); incrementPutCompletedStatistics(true, len); // update metadata - finishedWrite(putObjectRequest.getKey(), len); + finishedWrite(putObjectRequest.getKey(), len, + result.getETag(), result.getVersionId()); return result; } catch (AmazonClientException e) { incrementPutCompletedStatistics(false, len); @@ -2160,7 +2247,7 @@ public FileStatus[] innerListStatus(Path f) throws FileNotFoundException, LOG.debug("List status for path: {}", path); entryPoint(INVOCATION_LIST_STATUS); - List result; + List result; final FileStatus fileStatus = getFileStatus(path); if (fileStatus.isDirectory()) { @@ -2419,11 +2506,11 @@ S3AFileStatus innerGetFileStatus(final Path f, } } - FileStatus msStatus = pm.getFileStatus(); + S3AFileStatus msStatus = pm.getFileStatus(); if (needEmptyDirectoryFlag && msStatus.isDirectory()) { if (pm.isEmptyDirectory() != Tristate.UNKNOWN) { // We have a definitive true / false from MetadataStore, we are done. - return S3AFileStatus.fromFileStatus(msStatus, pm.isEmptyDirectory()); + return msStatus; } else { DirListingMetadata children = S3Guard.listChildrenWithTtl(metadataStore, path, ttlTimeProvider); @@ -2434,7 +2521,7 @@ S3AFileStatus innerGetFileStatus(final Path f, } } else { // Either this is not a directory, or we don't care if it is empty - return S3AFileStatus.fromFileStatus(msStatus, pm.isEmptyDirectory()); + return msStatus; } // If the metadata store has no children for it and it's not listed in @@ -2443,7 +2530,8 @@ S3AFileStatus innerGetFileStatus(final Path f, try { s3FileStatus = s3GetFileStatus(path, key, tombstones); } catch (FileNotFoundException e) { - return S3AFileStatus.fromFileStatus(msStatus, Tristate.TRUE); + return S3AFileStatus.fromFileStatus(msStatus, Tristate.TRUE, + null, null); } // entry was found, save in S3Guard return S3Guard.putAndReturn(metadataStore, s3FileStatus, instrumentation); @@ -2482,7 +2570,9 @@ private S3AFileStatus s3GetFileStatus(final Path path, String key, dateToLong(meta.getLastModified()), path, getDefaultBlockSize(path), - username); + username, + meta.getETag(), + meta.getVersionId()); } } catch (AmazonServiceException e) { if (e.getStatusCode() != 404) { @@ -2509,7 +2599,9 @@ private S3AFileStatus s3GetFileStatus(final Path path, String key, dateToLong(meta.getLastModified()), path, getDefaultBlockSize(path), - username); + username, + meta.getETag(), + meta.getVersionId()); } } catch (AmazonServiceException e) { if (e.getStatusCode() != 404) { @@ -2734,7 +2826,8 @@ UploadResult executePut(PutObjectRequest putObjectRequest, UploadResult result = waitForUploadCompletion(key, info); listener.uploadCompleted(); // post-write actions - finishedWrite(key, info.getLength()); + finishedWrite(key, info.getLength(), + result.getETag(), result.getVersionId()); return result; } @@ -2899,12 +2992,15 @@ public List listAWSPolicyRules( * @param srcKey source object path * @param dstKey destination object path * @param size object size - * @throws AmazonClientException on failures inside the AWS SDK + * @param srcAttributes S3 attributes of the source object + * @param readContext the read context + * @return the result of the copy * @throws InterruptedIOException the operation was interrupted * @throws IOException Other IO problems */ - @Retries.RetryMixed - private void copyFile(String srcKey, String dstKey, long size) + @Retries.RetryTranslated + private CopyResult copyFile(String srcKey, String dstKey, long size, + S3ObjectAttributes srcAttributes, S3AReadOpContext readContext) throws IOException, InterruptedIOException { LOG.debug("copyFile {} -> {} ", srcKey, dstKey); @@ -2918,26 +3014,58 @@ private void copyFile(String srcKey, String dstKey, long size) } }; - once("copyFile(" + srcKey + ", " + dstKey + ")", srcKey, + ChangeTracker changeTracker = new ChangeTracker( + keyToQualifiedPath(srcKey).toString(), + changeDetectionPolicy, + readContext.instrumentation.newInputStreamStatistics() + .getVersionMismatchCounter(), + srcAttributes); + + String action = "copyFile(" + srcKey + ", " + dstKey + ")"; + Invoker readInvoker = readContext.getReadInvoker(); + + ObjectMetadata srcom = + once(action, srcKey, + () -> + getObjectMetadata(srcKey, changeTracker, readInvoker, "copy")); + ObjectMetadata dstom = cloneObjectMetadata(srcom); + setOptionalObjectMetadata(dstom); + + return readInvoker.retry( + action, srcKey, + true, () -> { - ObjectMetadata srcom = getObjectMetadata(srcKey); - ObjectMetadata dstom = cloneObjectMetadata(srcom); - setOptionalObjectMetadata(dstom); CopyObjectRequest copyObjectRequest = new CopyObjectRequest(bucket, srcKey, bucket, dstKey); + changeTracker.maybeApplyConstraint(copyObjectRequest); + setOptionalCopyObjectRequestParameters(copyObjectRequest); copyObjectRequest.setCannedAccessControlList(cannedACL); copyObjectRequest.setNewObjectMetadata(dstom); + Optional.ofNullable(srcom.getStorageClass()) + .ifPresent(copyObjectRequest::setStorageClass); Copy copy = transfers.copy(copyObjectRequest); copy.addProgressListener(progressListener); - try { - copy.waitForCopyResult(); - incrementWriteOperations(); - instrumentation.filesCopied(1, size); - } catch (InterruptedException e) { - throw new InterruptedIOException("Interrupted copying " + srcKey - + " to " + dstKey + ", cancelling"); + CopyOutcome copyOutcome = CopyOutcome.waitForCopy(copy); + InterruptedException interruptedException = + copyOutcome.getInterruptedException(); + if (interruptedException != null) { + // copy interrupted: convert to an IOException. + throw (IOException)new InterruptedIOException( + "Interrupted copying " + srcKey + + " to " + dstKey + ", cancelling") + .initCause(interruptedException); } + SdkBaseException awsException = copyOutcome.getAwsException(); + if (awsException != null) { + changeTracker.processException(awsException, "copy"); + throw awsException; + } + CopyResult result = copyOutcome.getCopyResult(); + changeTracker.processResponse(result); + incrementWriteOperations(); + instrumentation.filesCopied(1, size); + return result; }); } @@ -3044,6 +3172,8 @@ private Optional generateSSECustomerKey() { * * @param key key written to * @param length total length of file written + * @param eTag eTag of the written object + * @param versionId S3 object versionId of the written object * @throws MetadataPersistenceException if metadata about the write could * not be saved to the metadata store and * fs.s3a.metadatastore.fail.on.write.error=true @@ -3051,7 +3181,7 @@ private Optional generateSSECustomerKey() { @InterfaceAudience.Private @Retries.RetryTranslated("Except if failOnMetadataWriteError=false, in which" + " case RetryExceptionsSwallowed") - void finishedWrite(String key, long length) + void finishedWrite(String key, long length, String eTag, String versionId) throws MetadataPersistenceException { LOG.debug("Finished write to {}, len {}", key, length); Path p = keyToQualifiedPath(key); @@ -3064,7 +3194,7 @@ void finishedWrite(String key, long length) S3Guard.addAncestors(metadataStore, p, username); S3AFileStatus status = createUploadFileStatus(p, S3AUtils.objectRepresentsDirectory(key, length), length, - getDefaultBlockSize(p), username); + getDefaultBlockSize(p), username, eTag, versionId); S3Guard.putAndReturn(metadataStore, status, instrumentation); } } catch (IOException e) { @@ -3436,26 +3566,41 @@ public EtagChecksum getFileChecksum(Path f, final long length) @Retries.OnceTranslated public RemoteIterator listFiles(Path f, boolean recursive) throws FileNotFoundException, IOException { - return innerListFiles(f, recursive, - new Listing.AcceptFilesOnly(qualify(f))); + return toLocatedFileStatusIterator(innerListFiles(f, recursive, + new Listing.AcceptFilesOnly(qualify(f)))); + } + + private static RemoteIterator toLocatedFileStatusIterator( + RemoteIterator iterator) { + return new RemoteIterator() { + @Override + public boolean hasNext() throws IOException { + return iterator.hasNext(); + } + + @Override + public LocatedFileStatus next() throws IOException { + return iterator.next(); + } + }; } @Retries.OnceTranslated - public RemoteIterator listFilesAndEmptyDirectories(Path f, - boolean recursive) throws IOException { + public RemoteIterator listFilesAndEmptyDirectories( + Path f, boolean recursive) throws IOException { return innerListFiles(f, recursive, new Listing.AcceptAllButS3nDirs()); } @Retries.OnceTranslated - private RemoteIterator innerListFiles(Path f, boolean + private RemoteIterator innerListFiles(Path f, boolean recursive, Listing.FileStatusAcceptor acceptor) throws IOException { entryPoint(INVOCATION_LIST_FILES); Path path = qualify(f); LOG.debug("listFiles({}, {})", path, recursive); try { // lookup dir triggers existence check - final FileStatus fileStatus = getFileStatus(path); + final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path); if (fileStatus.isFile()) { // simple case: File LOG.debug("Path is a file"); @@ -3467,7 +3612,7 @@ private RemoteIterator innerListFiles(Path f, boolean String delimiter = recursive ? null : "/"; LOG.debug("Requesting all entries under {} with delimiter '{}'", key, delimiter); - final RemoteIterator cachedFilesIterator; + final RemoteIterator cachedFilesIterator; final Set tombstones; if (recursive) { final PathMetadata pm = metadataStore.get(path, true); @@ -3539,52 +3684,55 @@ public RemoteIterator listLocatedStatus(final Path f, entryPoint(INVOCATION_LIST_LOCATED_STATUS); Path path = qualify(f); LOG.debug("listLocatedStatus({}, {}", path, filter); - return once("listLocatedStatus", path.toString(), - () -> { - // lookup dir triggers existence check - final FileStatus fileStatus = getFileStatus(path); - if (fileStatus.isFile()) { - // simple case: File - LOG.debug("Path is a file"); - return new Listing.SingleStatusRemoteIterator( - filter.accept(path) ? toLocatedFileStatus(fileStatus) : null); - } else { - // directory: trigger a lookup - final String key = maybeAddTrailingSlash(pathToKey(path)); - final Listing.FileStatusAcceptor acceptor = - new Listing.AcceptAllButSelfAndS3nDirs(path); - DirListingMetadata meta = - S3Guard.listChildrenWithTtl(metadataStore, path, - ttlTimeProvider); - final RemoteIterator cachedFileStatusIterator = - listing.createProvidedFileStatusIterator( - S3Guard.dirMetaToStatuses(meta), filter, acceptor); - return (allowAuthoritative && meta != null - && meta.isAuthoritative()) - ? listing.createLocatedFileStatusIterator( - cachedFileStatusIterator) - : listing.createLocatedFileStatusIterator( - listing.createFileStatusListingIterator(path, - createListObjectsRequest(key, "/"), - filter, - acceptor, - cachedFileStatusIterator)); - } - }); + RemoteIterator iterator = + once("listLocatedStatus", path.toString(), + () -> { + // lookup dir triggers existence check + final S3AFileStatus fileStatus = + (S3AFileStatus) getFileStatus(path); + if (fileStatus.isFile()) { + // simple case: File + LOG.debug("Path is a file"); + return new Listing.SingleStatusRemoteIterator( + filter.accept(path) ? toLocatedFileStatus(fileStatus) : null); + } else { + // directory: trigger a lookup + final String key = maybeAddTrailingSlash(pathToKey(path)); + final Listing.FileStatusAcceptor acceptor = + new Listing.AcceptAllButSelfAndS3nDirs(path); + DirListingMetadata meta = + S3Guard.listChildrenWithTtl(metadataStore, path, + ttlTimeProvider); + final RemoteIterator cachedFileStatusIterator = + listing.createProvidedFileStatusIterator( + S3Guard.dirMetaToStatuses(meta), filter, acceptor); + return (allowAuthoritative && meta != null + && meta.isAuthoritative()) + ? listing.createLocatedFileStatusIterator( + cachedFileStatusIterator) + : listing.createLocatedFileStatusIterator( + listing.createFileStatusListingIterator(path, + createListObjectsRequest(key, "/"), + filter, + acceptor, + cachedFileStatusIterator)); + } + }); + return toLocatedFileStatusIterator(iterator); } /** - * Build a {@link LocatedFileStatus} from a {@link FileStatus} instance. + * Build a {@link S3ALocatedFileStatus} from a {@link FileStatus} instance. * @param status file status * @return a located status with block locations set up from this FS. * @throws IOException IO Problems. */ - LocatedFileStatus toLocatedFileStatus(FileStatus status) + S3ALocatedFileStatus toLocatedFileStatus(S3AFileStatus status) throws IOException { - return new LocatedFileStatus(status, + return new S3ALocatedFileStatus(status, status.isFile() ? getFileBlockLocations(status, 0, status.getLen()) - : null); + : null, status.getETag(), status.getVersionId()); } /** @@ -3743,17 +3891,38 @@ private FSDataInputStream select(final Path source, // so the operation will fail if it is not there or S3Guard believes it has // been deleted. // validation of the file status are delegated to the binding. - final FileStatus fileStatus = getFileStatus(path); + final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path); // readahead range can be dynamically set long ra = options.getLong(READAHEAD_RANGE, readAhead); + S3ObjectAttributes objectAttributes = createObjectAttributes( + path, fileStatus.getETag(), fileStatus.getVersionId()); + S3AReadOpContext readContext = createReadContext(fileStatus, inputPolicy, + changeDetectionPolicy, ra); + + if (!fileStatus.isDirectory()) { + // check that the object metadata lines up with what is expected + // based on the object attributes (which may contain an eTag or + // versionId) from S3Guard + ChangeTracker changeTracker = + new ChangeTracker(uri.toString(), + changeDetectionPolicy, + readContext.instrumentation.newInputStreamStatistics() + .getVersionMismatchCounter(), + objectAttributes); + + // will retry internally if wrong version detected + Invoker readInvoker = readContext.getReadInvoker(); + getObjectMetadata(path, changeTracker, readInvoker, "select"); + } + // build and execute the request return selectBinding.select( - createReadContext(fileStatus, inputPolicy, changeDetectionPolicy, ra), + readContext, expression, options, generateSSECustomerKey(), - createObjectAttributes(path)); + objectAttributes); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java index cbe796bdcd..6e7a2511f6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java @@ -153,7 +153,8 @@ public S3AInputStream(S3AReadOpContext ctx, this.serverSideEncryptionKey = s3Attributes.getServerSideEncryptionKey(); this.changeTracker = new ChangeTracker(uri, ctx.getChangeDetectionPolicy(), - streamStatistics.getVersionMismatchCounter()); + streamStatistics.getVersionMismatchCounter(), + s3Attributes); setInputPolicy(ctx.getInputPolicy()); setReadahead(ctx.getReadahead()); } @@ -344,9 +345,12 @@ public boolean seekToNewSource(long targetPos) throws IOException { private void lazySeek(long targetPos, long len) throws IOException { // With S3Guard, the metadatastore gave us metadata for the file in - // open(), so we use a slightly different retry policy. + // open(), so we use a slightly different retry policy, but only on initial + // open. After that, an exception generally means the file has changed + // and there is no point retrying anymore. Invoker invoker = context.getReadInvoker(); - invoker.retry("lazySeek", pathStr, true, + invoker.maybeRetry(streamStatistics.openOperations == 0, + "lazySeek", pathStr, true, () -> { //For lazy seek seekInStream(targetPos, len); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ALocatedFileStatus.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ALocatedFileStatus.java new file mode 100644 index 0000000000..d3ca2610e2 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ALocatedFileStatus.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.LocatedFileStatus; + +import static com.google.common.base.Preconditions.checkNotNull; + +/** + * {@link LocatedFileStatus} extended to also carry ETag and object version ID. + */ +public class S3ALocatedFileStatus extends LocatedFileStatus { + + private static final long serialVersionUID = 3597192103662929338L; + + private final String eTag; + private final String versionId; + + public S3ALocatedFileStatus(S3AFileStatus status, BlockLocation[] locations, + String eTag, String versionId) { + super(checkNotNull(status), locations); + this.eTag = eTag; + this.versionId = versionId; + } + + public String getETag() { + return eTag; + } + + public String getVersionId() { + return versionId; + } + + // equals() and hashCode() overridden to avoid FindBugs warning. + // Base implementation is equality on Path only, which is still appropriate. + + @Override + public boolean equals(Object o) { + return super.equals(o); + } + + @Override + public int hashCode() { + return super.hashCode(); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 85181c3af8..4d9fc3292f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -533,16 +533,20 @@ public static String stringify(AmazonS3Exception e) { * @param summary summary from AWS * @param blockSize block size to declare. * @param owner owner of the file + * @param eTag S3 object eTag or null if unavailable + * @param versionId S3 object versionId or null if unavailable * @return a status entry */ public static S3AFileStatus createFileStatus(Path keyPath, S3ObjectSummary summary, long blockSize, - String owner) { + String owner, + String eTag, + String versionId) { long size = summary.getSize(); return createFileStatus(keyPath, objectRepresentsDirectory(summary.getKey(), size), - size, summary.getLastModified(), blockSize, owner); + size, summary.getLastModified(), blockSize, owner, eTag, versionId); } /** @@ -555,22 +559,27 @@ public static S3AFileStatus createFileStatus(Path keyPath, * @param size file length * @param blockSize block size for file status * @param owner Hadoop username + * @param eTag S3 object eTag or null if unavailable + * @param versionId S3 object versionId or null if unavailable * @return a status entry */ public static S3AFileStatus createUploadFileStatus(Path keyPath, - boolean isDir, long size, long blockSize, String owner) { + boolean isDir, long size, long blockSize, String owner, + String eTag, String versionId) { Date date = isDir ? null : new Date(); - return createFileStatus(keyPath, isDir, size, date, blockSize, owner); + return createFileStatus(keyPath, isDir, size, date, blockSize, owner, + eTag, versionId); } /* Date 'modified' is ignored when isDir is true. */ private static S3AFileStatus createFileStatus(Path keyPath, boolean isDir, - long size, Date modified, long blockSize, String owner) { + long size, Date modified, long blockSize, String owner, + String eTag, String versionId) { if (isDir) { return new S3AFileStatus(Tristate.UNKNOWN, keyPath, owner); } else { return new S3AFileStatus(size, dateToLong(modified), keyPath, blockSize, - owner); + owner, eTag, versionId); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3GuardExistsRetryPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3GuardExistsRetryPolicy.java index 023d0c3cf2..1a0135bb9b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3GuardExistsRetryPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3GuardExistsRetryPolicy.java @@ -42,6 +42,7 @@ public S3GuardExistsRetryPolicy(Configuration conf) { protected Map, RetryPolicy> createExceptionMap() { Map, RetryPolicy> b = super.createExceptionMap(); b.put(FileNotFoundException.class, retryIdempotentCalls); + b.put(RemoteFileChangedException.class, retryIdempotentCalls); return b; } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java index d67e3e1e8c..2e62ff6728 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java @@ -34,16 +34,22 @@ public class S3ObjectAttributes { private final String key; private final S3AEncryptionMethods serverSideEncryptionAlgorithm; private final String serverSideEncryptionKey; + private final String eTag; + private final String versionId; public S3ObjectAttributes( String bucket, String key, S3AEncryptionMethods serverSideEncryptionAlgorithm, - String serverSideEncryptionKey) { + String serverSideEncryptionKey, + String eTag, + String versionId) { this.bucket = bucket; this.key = key; this.serverSideEncryptionAlgorithm = serverSideEncryptionAlgorithm; this.serverSideEncryptionKey = serverSideEncryptionKey; + this.eTag = eTag; + this.versionId = versionId; } public String getBucket() { @@ -61,4 +67,12 @@ public S3AEncryptionMethods getServerSideEncryptionAlgorithm() { public String getServerSideEncryptionKey() { return serverSideEncryptionKey; } + + public String getETag() { + return eTag; + } + + public String getVersionId() { + return versionId; + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java index ea091720c2..54386addd7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java @@ -226,7 +226,8 @@ public String initiateMultiPartUpload(String destKey) throws IOException { /** * Finalize a multipart PUT operation. * This completes the upload, and, if that works, calls - * {@link S3AFileSystem#finishedWrite(String, long)} to update the filesystem. + * {@link S3AFileSystem#finishedWrite(String, long, String, String)} + * to update the filesystem. * Retry policy: retrying, translated. * @param destKey destination of the commit * @param uploadId multipart operation Id @@ -247,20 +248,22 @@ private CompleteMultipartUploadResult finalizeMultipartUpload( throw new IOException( "No upload parts in multipart upload to " + destKey); } - CompleteMultipartUploadResult uploadResult = invoker.retry("Completing multipart commit", destKey, - true, - retrying, - () -> { - // a copy of the list is required, so that the AWS SDK doesn't - // attempt to sort an unmodifiable list. - return owner.getAmazonS3Client().completeMultipartUpload( - new CompleteMultipartUploadRequest(bucket, - destKey, - uploadId, - new ArrayList<>(partETags))); - } + CompleteMultipartUploadResult uploadResult = + invoker.retry("Completing multipart commit", destKey, + true, + retrying, + () -> { + // a copy of the list is required, so that the AWS SDK doesn't + // attempt to sort an unmodifiable list. + return owner.getAmazonS3Client().completeMultipartUpload( + new CompleteMultipartUploadRequest(bucket, + destKey, + uploadId, + new ArrayList<>(partETags))); + } ); - owner.finishedWrite(destKey, length); + owner.finishedWrite(destKey, length, uploadResult.getETag(), + uploadResult.getVersionId()); return uploadResult; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java index f3d8bc20c8..b0e9d6f454 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeDetectionPolicy.java @@ -20,8 +20,11 @@ import java.util.Locale; +import com.amazonaws.services.s3.model.CopyObjectRequest; +import com.amazonaws.services.s3.model.GetObjectMetadataRequest; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.transfer.model.CopyResult; import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,6 +33,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.S3ObjectAttributes; import org.apache.hadoop.fs.s3a.RemoteFileChangedException; import static org.apache.hadoop.fs.s3a.Constants.*; @@ -47,7 +51,7 @@ public abstract class ChangeDetectionPolicy { LoggerFactory.getLogger(ChangeDetectionPolicy.class); @VisibleForTesting - public static final String CHANGE_DETECTED = "change detected"; + public static final String CHANGE_DETECTED = "change detected on client"; private final Mode mode; private final boolean requireVersion; @@ -200,6 +204,28 @@ public static ChangeDetectionPolicy createPolicy(final Mode mode, public abstract String getRevisionId(ObjectMetadata objectMetadata, String uri); + /** + * Like {{@link #getRevisionId(ObjectMetadata, String)}}, but retrieves the + * revision identifier from {@link S3ObjectAttributes}. + * + * @param s3Attributes the object attributes + * @return the revisionId string as interpreted by this policy, or potentially + * null if the attribute is unavailable (such as when the policy says to use + * versionId but object versioning is not enabled for the bucket). + */ + public abstract String getRevisionId(S3ObjectAttributes s3Attributes); + + /** + * Like {{@link #getRevisionId(ObjectMetadata, String)}}, but retrieves the + * revision identifier from {@link CopyResult}. + * + * @param copyResult the copy result + * @return the revisionId string as interpreted by this policy, or potentially + * null if the attribute is unavailable (such as when the policy says to use + * versionId but object versioning is not enabled for the bucket). + */ + public abstract String getRevisionId(CopyResult copyResult); + /** * Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId} * as a server-side qualification on the {@code GetObjectRequest}. @@ -210,6 +236,26 @@ public abstract String getRevisionId(ObjectMetadata objectMetadata, public abstract void applyRevisionConstraint(GetObjectRequest request, String revisionId); + /** + * Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId} + * as a server-side qualification on the {@code CopyObjectRequest}. + * + * @param request the request + * @param revisionId the revision id + */ + public abstract void applyRevisionConstraint(CopyObjectRequest request, + String revisionId); + + /** + * Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId} + * as a server-side qualification on the {@code GetObjectMetadataRequest}. + * + * @param request the request + * @param revisionId the revision id + */ + public abstract void applyRevisionConstraint(GetObjectMetadataRequest request, + String revisionId); + /** * Takes appropriate action based on {@link #getMode() mode} when a change has * been detected. @@ -234,6 +280,7 @@ public ImmutablePair onChangeDetected( long position, String operation, long timesAlreadyDetected) { + String positionText = position >= 0 ? (" at " + position) : ""; switch (mode) { case None: // something changed; we don't care. @@ -242,8 +289,9 @@ public ImmutablePair onChangeDetected( if (timesAlreadyDetected == 0) { // only warn on the first detection to avoid a noisy log LOG.warn( - String.format("%s change detected on %s %s at %d. Expected %s got %s", - getSource(), operation, uri, position, revisionId, + String.format( + "%s change detected on %s %s%s. Expected %s got %s", + getSource(), operation, uri, positionText, revisionId, newRevisionId)); return new ImmutablePair<>(true, null); } @@ -251,15 +299,16 @@ public ImmutablePair onChangeDetected( case Client: case Server: default: - // mode == Client (or Server, but really won't be called for Server) + // mode == Client or Server; will trigger on version failures + // of getObjectMetadata even on server. return new ImmutablePair<>(true, new RemoteFileChangedException(uri, operation, String.format("%s " + CHANGE_DETECTED - + " while reading at position %s." + + " during %s%s." + " Expected %s got %s", - getSource(), position, revisionId, newRevisionId))); + getSource(), operation, positionText, revisionId, newRevisionId))); } } @@ -277,11 +326,38 @@ public String getRevisionId(ObjectMetadata objectMetadata, String uri) { return objectMetadata.getETag(); } + @Override + public String getRevisionId(S3ObjectAttributes s3Attributes) { + return s3Attributes.getETag(); + } + + @Override + public String getRevisionId(CopyResult copyResult) { + return copyResult.getETag(); + } + @Override public void applyRevisionConstraint(GetObjectRequest request, String revisionId) { - LOG.debug("Restricting request to etag {}", revisionId); - request.withMatchingETagConstraint(revisionId); + if (revisionId != null) { + LOG.debug("Restricting get request to etag {}", revisionId); + request.withMatchingETagConstraint(revisionId); + } + } + + @Override + public void applyRevisionConstraint(CopyObjectRequest request, + String revisionId) { + if (revisionId != null) { + LOG.debug("Restricting copy request to etag {}", revisionId); + request.withMatchingETagConstraint(revisionId); + } + } + + @Override + public void applyRevisionConstraint(GetObjectMetadataRequest request, + String revisionId) { + // GetObjectMetadataRequest doesn't support eTag qualification } @Override @@ -323,11 +399,41 @@ public String getRevisionId(ObjectMetadata objectMetadata, String uri) { return versionId; } + @Override + public String getRevisionId(S3ObjectAttributes s3Attributes) { + return s3Attributes.getVersionId(); + } + + @Override + public String getRevisionId(CopyResult copyResult) { + return copyResult.getVersionId(); + } + @Override public void applyRevisionConstraint(GetObjectRequest request, String revisionId) { - LOG.debug("Restricting request to version {}", revisionId); - request.withVersionId(revisionId); + if (revisionId != null) { + LOG.debug("Restricting get request to version {}", revisionId); + request.withVersionId(revisionId); + } + } + + @Override + public void applyRevisionConstraint(CopyObjectRequest request, + String revisionId) { + if (revisionId != null) { + LOG.debug("Restricting copy request to version {}", revisionId); + request.withSourceVersionId(revisionId); + } + } + + @Override + public void applyRevisionConstraint(GetObjectMetadataRequest request, + String revisionId) { + if (revisionId != null) { + LOG.debug("Restricting metadata request to version {}", revisionId); + request.withVersionId(revisionId); + } } @Override @@ -361,12 +467,34 @@ public String getRevisionId(final ObjectMetadata objectMetadata, return null; } + @Override + public String getRevisionId(final S3ObjectAttributes s3ObjectAttributes) { + return null; + } + + @Override + public String getRevisionId(CopyResult copyResult) { + return null; + } + @Override public void applyRevisionConstraint(final GetObjectRequest request, final String revisionId) { } + @Override + public void applyRevisionConstraint(CopyObjectRequest request, + String revisionId) { + + } + + @Override + public void applyRevisionConstraint(GetObjectMetadataRequest request, + String revisionId) { + + } + @Override public String toString() { return "NoChangeDetection"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java index f76602b953..75fecd5f14 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java @@ -20,11 +20,15 @@ import java.util.concurrent.atomic.AtomicLong; +import com.amazonaws.AmazonServiceException; +import com.amazonaws.SdkBaseException; +import com.amazonaws.services.s3.model.CopyObjectRequest; +import com.amazonaws.services.s3.model.GetObjectMetadataRequest; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.services.s3.transfer.model.CopyResult; import com.google.common.annotations.VisibleForTesting; -import org.apache.hadoop.fs.s3a.NoVersionAttributeException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,14 +36,18 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.s3a.NoVersionAttributeException; import org.apache.hadoop.fs.s3a.RemoteFileChangedException; +import org.apache.hadoop.fs.s3a.S3ObjectAttributes; import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.http.HttpStatus.SC_PRECONDITION_FAILED; /** - * Change tracking for input streams: the revision ID/etag - * the previous request is recorded and when the next request comes in, - * it is compared. + * Change tracking for input streams: the version ID or etag of the object is + * tracked and compared on open/re-open. An initial version ID or etag may or + * may not be available, depending on usage (e.g. if S3Guard is utilized). + * * Self-contained for testing and use in different streams. */ @InterfaceAudience.Private @@ -49,7 +57,7 @@ public class ChangeTracker { private static final Logger LOG = LoggerFactory.getLogger(ChangeTracker.class); - public static final String CHANGE_REPORTED_BY_S3 = "reported by S3"; + public static final String CHANGE_REPORTED_BY_S3 = "Change reported by S3"; /** Policy to use. */ private final ChangeDetectionPolicy policy; @@ -76,13 +84,20 @@ public class ChangeTracker { * @param uri URI of object being tracked * @param policy policy to track. * @param versionMismatches reference to the version mismatch counter + * @param s3ObjectAttributes attributes of the object, potentially including + * an eTag or versionId to match depending on {@code policy} */ public ChangeTracker(final String uri, final ChangeDetectionPolicy policy, - final AtomicLong versionMismatches) { + final AtomicLong versionMismatches, + final S3ObjectAttributes s3ObjectAttributes) { this.policy = checkNotNull(policy); this.uri = uri; this.versionMismatches = versionMismatches; + this.revisionId = policy.getRevisionId(s3ObjectAttributes); + if (revisionId != null) { + LOG.debug("Revision ID for object at {}: {}", uri, revisionId); + } } public String getRevisionId() { @@ -115,6 +130,33 @@ public boolean maybeApplyConstraint( return false; } + /** + * Apply any revision control set by the policy if it is to be + * enforced on the server. + * @param request request to modify + * @return true iff a constraint was added. + */ + public boolean maybeApplyConstraint( + final CopyObjectRequest request) { + + if (policy.getMode() == ChangeDetectionPolicy.Mode.Server + && revisionId != null) { + policy.applyRevisionConstraint(request, revisionId); + return true; + } + return false; + } + + public boolean maybeApplyConstraint( + final GetObjectMetadataRequest request) { + + if (policy.getMode() == ChangeDetectionPolicy.Mode.Server + && revisionId != null) { + policy.applyRevisionConstraint(request, revisionId); + return true; + } + return false; + } /** * Process the response from the server for validation against the @@ -135,29 +177,106 @@ public void processResponse(final S3Object object, // object was not returned. versionMismatches.incrementAndGet(); throw new RemoteFileChangedException(uri, operation, - String.format("%s change " - + CHANGE_REPORTED_BY_S3 - + " while reading" + String.format(CHANGE_REPORTED_BY_S3 + + " during %s" + " at position %s." - + " Version %s was unavailable", - getSource(), + + " %s %s was unavailable", + operation, pos, + getSource(), getRevisionId())); } else { throw new PathIOException(uri, "No data returned from GET request"); } } - final ObjectMetadata metadata = object.getObjectMetadata(); + processMetadata(object.getObjectMetadata(), operation); + } + + /** + * Process the response from the server for validation against the + * change policy. + * @param copyResult result of a copy operation + * @throws PathIOException raised on failure + * @throws RemoteFileChangedException if the remote file has changed. + */ + public void processResponse(final CopyResult copyResult) + throws PathIOException { + // ETag (sometimes, depending on encryption and/or multipart) is not the + // same on the copied object as the original. Version Id seems to never + // be the same on the copy. As such, there isn't really anything that + // can be verified on the response, except that a revision ID is present + // if required. + String newRevisionId = policy.getRevisionId(copyResult); + LOG.debug("Copy result {}: {}", policy.getSource(), newRevisionId); + if (newRevisionId == null && policy.isRequireVersion()) { + throw new NoVersionAttributeException(uri, String.format( + "Change detection policy requires %s", + policy.getSource())); + } + } + + /** + * Process an exception generated against the change policy. + * If the exception indicates the file has changed, this method throws + * {@code RemoteFileChangedException} with the original exception as the + * cause. + * @param e the exception + * @param operation the operation performed when the exception was + * generated (e.g. "copy", "read", "select"). + * @throws RemoteFileChangedException if the remote file has changed. + */ + public void processException(SdkBaseException e, String operation) throws + RemoteFileChangedException { + if (e instanceof AmazonServiceException) { + AmazonServiceException serviceException = (AmazonServiceException) e; + // This isn't really going to be hit due to + // https://github.com/aws/aws-sdk-java/issues/1644 + if (serviceException.getStatusCode() == SC_PRECONDITION_FAILED) { + versionMismatches.incrementAndGet(); + throw new RemoteFileChangedException(uri, operation, String.format( + RemoteFileChangedException.PRECONDITIONS_FAILED + + " on %s." + + " Version %s was unavailable", + getSource(), + getRevisionId()), + serviceException); + } + } + } + + /** + * Process metadata response from server for validation against the change + * policy. + * @param metadata metadata returned from server + * @param operation operation in progress + * @throws PathIOException raised on failure + * @throws RemoteFileChangedException if the remote file has changed. + */ + public void processMetadata(final ObjectMetadata metadata, + final String operation) throws PathIOException { final String newRevisionId = policy.getRevisionId(metadata, uri); + processNewRevision(newRevisionId, operation, -1); + } + + /** + * Validate a revision from the server against our expectations. + * @param newRevisionId new revision. + * @param operation operation in progress + * @param pos offset in the file; -1 for "none" + * @throws PathIOException raised on failure + * @throws RemoteFileChangedException if the remote file has changed. + */ + private void processNewRevision(final String newRevisionId, + final String operation, final long pos) throws PathIOException { if (newRevisionId == null && policy.isRequireVersion()) { throw new NoVersionAttributeException(uri, String.format( "Change detection policy requires %s", policy.getSource())); } if (revisionId == null) { - // revisionId is null on first (re)open. Pin it so change can be detected - // if object has been updated + // revisionId may be null on first (re)open. Pin it so change can be + // detected if object has been updated LOG.debug("Setting revision ID for object at {}: {}", uri, newRevisionId); revisionId = newRevisionId; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyOutcome.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyOutcome.java new file mode 100644 index 0000000000..16459ac45b --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyOutcome.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import com.amazonaws.SdkBaseException; +import com.amazonaws.services.s3.transfer.Copy; +import com.amazonaws.services.s3.transfer.model.CopyResult; + +/** + * Extracts the outcome of a TransferManager-executed copy operation. + */ +public final class CopyOutcome { + + /** + * Result of a successful copy. + */ + private final CopyResult copyResult; + + /** the copy was interrupted. */ + private final InterruptedException interruptedException; + + /** + * The copy raised an AWS Exception of some form. + */ + private final SdkBaseException awsException; + + public CopyOutcome(CopyResult copyResult, + InterruptedException interruptedException, + SdkBaseException awsException) { + this.copyResult = copyResult; + this.interruptedException = interruptedException; + this.awsException = awsException; + } + + public CopyResult getCopyResult() { + return copyResult; + } + + public InterruptedException getInterruptedException() { + return interruptedException; + } + + public SdkBaseException getAwsException() { + return awsException; + } + + /** + * Calls {@code Copy.waitForCopyResult()} to await the result, converts + * it to a copy outcome. + * Exceptions caught and + * @param copy the copy operation. + * @return the outcome. + */ + public static CopyOutcome waitForCopy(Copy copy) { + try { + CopyResult result = copy.waitForCopyResult(); + return new CopyOutcome(result, null, null); + } catch (SdkBaseException e) { + return new CopyOutcome(null, null, e); + } catch (InterruptedException e) { + return new CopyOutcome(null, e, null); + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DDBPathMetadata.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DDBPathMetadata.java index 78568dc4bb..e3a529ac14 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DDBPathMetadata.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DDBPathMetadata.java @@ -18,7 +18,7 @@ package org.apache.hadoop.fs.s3a.s3guard; -import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.Tristate; /** @@ -36,18 +36,18 @@ public DDBPathMetadata(PathMetadata pmd) { this.setLastUpdated(pmd.getLastUpdated()); } - public DDBPathMetadata(FileStatus fileStatus) { + public DDBPathMetadata(S3AFileStatus fileStatus) { super(fileStatus); this.isAuthoritativeDir = false; } - public DDBPathMetadata(FileStatus fileStatus, Tristate isEmptyDir, + public DDBPathMetadata(S3AFileStatus fileStatus, Tristate isEmptyDir, boolean isDeleted) { super(fileStatus, isEmptyDir, isDeleted); this.isAuthoritativeDir = false; } - public DDBPathMetadata(FileStatus fileStatus, Tristate isEmptyDir, + public DDBPathMetadata(S3AFileStatus fileStatus, Tristate isEmptyDir, boolean isDeleted, boolean isAuthoritativeDir, long lastUpdated) { super(fileStatus, isEmptyDir, isDeleted); this.isAuthoritativeDir = isAuthoritativeDir; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java index dcee35824e..88a46745b1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java @@ -28,9 +28,9 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.S3AFileStatus; /** * {@code DescendantsIterator} is a {@link RemoteIterator} that implements @@ -83,7 +83,7 @@ */ @InterfaceAudience.Private @InterfaceStability.Evolving -public class DescendantsIterator implements RemoteIterator { +public class DescendantsIterator implements RemoteIterator { private final MetadataStore metadataStore; private final Queue queue = new LinkedList<>(); @@ -121,7 +121,7 @@ public boolean hasNext() throws IOException { } @Override - public FileStatus next() throws IOException { + public S3AFileStatus next() throws IOException { if (!hasNext()) { throw new NoSuchElementException("No more descendants."); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java index 88f24aa984..1059dd1486 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DirListingMetadata.java @@ -34,6 +34,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.Tristate; /** @@ -61,7 +62,7 @@ public class DirListingMetadata extends ExpirableMetadata { * Create a directory listing metadata container. * * @param path Path of the directory. If this path has a host component, then - * all paths added later via {@link #put(FileStatus)} must also have + * all paths added later via {@link #put(S3AFileStatus)} must also have * the same host. * @param listing Entries in the directory. * @param isAuthoritative true iff listing is the full contents of the @@ -225,7 +226,7 @@ public void remove(Path childPath) { * @return true if the status was added or replaced with a new value. False * if the same FileStatus value was already present. */ - public boolean put(FileStatus childFileStatus) { + public boolean put(S3AFileStatus childFileStatus) { Preconditions.checkNotNull(childFileStatus, "childFileStatus must be non-null"); Path childPath = childStatusToPathKey(childFileStatus); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java index 769d3d4c4c..a9e1f33689 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java @@ -88,6 +88,7 @@ import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.Retries; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.S3AInstrumentation; import org.apache.hadoop.fs.s3a.S3AUtils; @@ -129,6 +130,14 @@ * This attribute is meaningful only to file items. *

  • optional long attribute revealing block size of the file. * This attribute is meaningful only to file items.
  • + *
  • optional string attribute tracking the s3 eTag of the file. + * May be absent if the metadata was entered with a version of S3Guard + * before this was tracked. + * This attribute is meaningful only to file items.
  • + *
  • optional string attribute tracking the s3 versionId of the file. + * May be absent if the metadata was entered with a version of S3Guard + * before this was tracked. + * This attribute is meaningful only to file items.
  • * * * The DynamoDB partition key is the parent, and the range key is the child. @@ -155,20 +164,20 @@ * This is persisted to a single DynamoDB table as: * *
    - * =========================================================================
    - * | parent                 | child | is_dir | mod_time | len |     ...    |
    - * =========================================================================
    - * | /bucket                | dir1  | true   |          |     |            |
    - * | /bucket/dir1           | dir2  | true   |          |     |            |
    - * | /bucket/dir1           | dir3  | true   |          |     |            |
    - * | /bucket/dir1/dir2      | file1 |        |   100    | 111 |            |
    - * | /bucket/dir1/dir2      | file2 |        |   200    | 222 |            |
    - * | /bucket/dir1/dir3      | dir4  | true   |          |     |            |
    - * | /bucket/dir1/dir3      | dir5  | true   |          |     |            |
    - * | /bucket/dir1/dir3/dir4 | file3 |        |   300    | 333 |            |
    - * | /bucket/dir1/dir3/dir5 | file4 |        |   400    | 444 |            |
    - * | /bucket/dir1/dir3      | dir6  | true   |          |     |            |
    - * =========================================================================
    + * ====================================================================================
    + * | parent                 | child | is_dir | mod_time | len | etag | ver_id |  ...  |
    + * ====================================================================================
    + * | /bucket                | dir1  | true   |          |     |      |        |       |
    + * | /bucket/dir1           | dir2  | true   |          |     |      |        |       |
    + * | /bucket/dir1           | dir3  | true   |          |     |      |        |       |
    + * | /bucket/dir1/dir2      | file1 |        |   100    | 111 | abc  |  mno   |       |
    + * | /bucket/dir1/dir2      | file2 |        |   200    | 222 | def  |  pqr   |       |
    + * | /bucket/dir1/dir3      | dir4  | true   |          |     |      |        |       |
    + * | /bucket/dir1/dir3      | dir5  | true   |          |     |      |        |       |
    + * | /bucket/dir1/dir3/dir4 | file3 |        |   300    | 333 | ghi  |  stu   |       |
    + * | /bucket/dir1/dir3/dir5 | file4 |        |   400    | 444 | jkl  |  vwx   |       |
    + * | /bucket/dir1/dir3      | dir6  | true   |          |     |      |        |       |
    + * ====================================================================================
      * 
    * * This choice of schema is efficient for read access patterns. @@ -618,16 +627,15 @@ private DDBPathMetadata innerGet(Path path, boolean wantEmptyDirectoryFlag) } /** - * Make a FileStatus object for a directory at given path. The FileStatus - * only contains what S3A needs, and omits mod time since S3A uses its own - * implementation which returns current system time. - * @param owner username of owner + * Make a S3AFileStatus object for a directory at given path. + * The FileStatus only contains what S3A needs, and omits mod time + * since S3A uses its own implementation which returns current system time. + * @param dirOwner username of owner * @param path path to dir - * @return new FileStatus + * @return new S3AFileStatus */ - private FileStatus makeDirStatus(String owner, Path path) { - return new FileStatus(0, true, 1, 0, 0, 0, null, - owner, null, path); + private S3AFileStatus makeDirStatus(String dirOwner, Path path) { + return new S3AFileStatus(Tristate.UNKNOWN, path, dirOwner); } @Override @@ -710,7 +718,7 @@ Collection completeAncestry( while (!parent.isRoot() && !ancestry.containsKey(parent)) { LOG.debug("auto-create ancestor path {} for child path {}", parent, path); - final FileStatus status = makeDirStatus(parent, username); + final S3AFileStatus status = makeDirStatus(parent, username); ancestry.put(parent, new DDBPathMetadata(status, Tristate.FALSE, false)); parent = parent.getParent(); @@ -915,7 +923,7 @@ Collection fullPathsToPut(DDBPathMetadata meta) while (path != null && !path.isRoot()) { final Item item = getConsistentItem(path); if (!itemExists(item)) { - final FileStatus status = makeDirStatus(path, username); + final S3AFileStatus status = makeDirStatus(path, username); metasToPut.add(new DDBPathMetadata(status, Tristate.FALSE, false, meta.isAuthoritativeDir(), meta.getLastUpdated())); path = path.getParent(); @@ -938,9 +946,8 @@ private boolean itemExists(Item item) { } /** Create a directory FileStatus using current system time as mod time. */ - static FileStatus makeDirStatus(Path f, String owner) { - return new FileStatus(0, true, 1, 0, System.currentTimeMillis(), 0, - null, owner, owner, f); + static S3AFileStatus makeDirStatus(Path f, String owner) { + return new S3AFileStatus(Tristate.UNKNOWN, f, owner); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java index b8f9635dcd..9276388679 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.Tristate; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -231,7 +232,7 @@ public void move(Collection pathsToDelete, public void put(PathMetadata meta) throws IOException { Preconditions.checkNotNull(meta); - FileStatus status = meta.getFileStatus(); + S3AFileStatus status = meta.getFileStatus(); Path path = standardize(status.getPath()); synchronized (this) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java index 378d10980c..e4e76c50d6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreListFilesIterator.java @@ -33,9 +33,9 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.S3AFileStatus; /** * {@code MetadataStoreListFilesIterator} is a {@link RemoteIterator} that @@ -85,14 +85,14 @@ @InterfaceAudience.Private @InterfaceStability.Evolving public class MetadataStoreListFilesIterator implements - RemoteIterator { + RemoteIterator { public static final Logger LOG = LoggerFactory.getLogger( MetadataStoreListFilesIterator.class); private final boolean allowAuthoritative; private final MetadataStore metadataStore; private final Set tombstones = new HashSet<>(); - private Iterator leafNodesIterator = null; + private Iterator leafNodesIterator = null; public MetadataStoreListFilesIterator(MetadataStore ms, PathMetadata meta, boolean allowAuthoritative) throws IOException { @@ -104,7 +104,7 @@ public MetadataStoreListFilesIterator(MetadataStore ms, PathMetadata meta, private void prefetch(PathMetadata meta) throws IOException { final Queue queue = new LinkedList<>(); - final Collection leafNodes = new ArrayList<>(); + final Collection leafNodes = new ArrayList<>(); if (meta != null) { final Path path = meta.getFileStatus().getPath(); @@ -121,7 +121,7 @@ private void prefetch(PathMetadata meta) throws IOException { while(!queue.isEmpty()) { PathMetadata nextMetadata = queue.poll(); - FileStatus nextStatus = nextMetadata.getFileStatus(); + S3AFileStatus nextStatus = nextMetadata.getFileStatus(); if (nextStatus.isFile()) { // All files are leaf nodes by definition leafNodes.add(nextStatus); @@ -159,7 +159,7 @@ public boolean hasNext() { } @Override - public FileStatus next() { + public S3AFileStatus next() { return leafNodesIterator.next(); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java index 56645fead7..8824439db7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadata.java @@ -21,8 +21,8 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.Tristate; /** @@ -33,7 +33,7 @@ @InterfaceStability.Evolving public class PathMetadata extends ExpirableMetadata { - private final FileStatus fileStatus; + private S3AFileStatus fileStatus; private Tristate isEmptyDirectory; private boolean isDeleted; @@ -43,24 +43,25 @@ public class PathMetadata extends ExpirableMetadata { * @return the entry. */ public static PathMetadata tombstone(Path path) { - long now = System.currentTimeMillis(); - FileStatus status = new FileStatus(0, false, 0, 0, now, path); - return new PathMetadata(status, Tristate.UNKNOWN, true); + S3AFileStatus s3aStatus = new S3AFileStatus(0, + System.currentTimeMillis(), path, 0, null, + null, null); + return new PathMetadata(s3aStatus, Tristate.UNKNOWN, true); } /** * Creates a new {@code PathMetadata} containing given {@code FileStatus}. * @param fileStatus file status containing an absolute path. */ - public PathMetadata(FileStatus fileStatus) { - this(fileStatus, Tristate.UNKNOWN); + public PathMetadata(S3AFileStatus fileStatus) { + this(fileStatus, Tristate.UNKNOWN, false); } - public PathMetadata(FileStatus fileStatus, Tristate isEmptyDir) { + public PathMetadata(S3AFileStatus fileStatus, Tristate isEmptyDir) { this(fileStatus, isEmptyDir, false); } - public PathMetadata(FileStatus fileStatus, Tristate isEmptyDir, boolean + public PathMetadata(S3AFileStatus fileStatus, Tristate isEmptyDir, boolean isDeleted) { Preconditions.checkNotNull(fileStatus, "fileStatus must be non-null"); Preconditions.checkNotNull(fileStatus.getPath(), "fileStatus path must be" + @@ -75,7 +76,7 @@ public PathMetadata(FileStatus fileStatus, Tristate isEmptyDir, boolean /** * @return {@code FileStatus} contained in this {@code PathMetadata}. */ - public final FileStatus getFileStatus() { + public final S3AFileStatus getFileStatus() { return fileStatus; } @@ -91,6 +92,7 @@ public Tristate isEmptyDirectory() { void setIsEmptyDirectory(Tristate isEmptyDirectory) { this.isEmptyDirectory = isEmptyDirectory; + fileStatus.setIsEmptyDirectory(isEmptyDirectory); } public boolean isDeleted() { @@ -128,10 +130,11 @@ public String toString() { * @param sb target StringBuilder */ public void prettyPrint(StringBuilder sb) { - sb.append(String.format("%-5s %-20s %-7d %-8s %-6s", + sb.append(String.format("%-5s %-20s %-7d %-8s %-6s %-20s %-20s", fileStatus.isDirectory() ? "dir" : "file", fileStatus.getPath().toString(), fileStatus.getLen(), - isEmptyDirectory.name(), isDeleted)); + isEmptyDirectory.name(), isDeleted, + fileStatus.getETag(), fileStatus.getVersionId())); sb.append(fileStatus); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java index c6f70bf277..c9559ec151 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java @@ -40,9 +40,9 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.Tristate; /** @@ -70,6 +70,8 @@ final class PathMetadataDynamoDBTranslation { static final String IS_DELETED = "is_deleted"; static final String IS_AUTHORITATIVE = "is_authoritative"; static final String LAST_UPDATED = "last_updated"; + static final String ETAG = "etag"; + static final String VERSION_ID = "version_id"; /** Used while testing backward compatibility. */ @VisibleForTesting @@ -135,7 +137,7 @@ static DDBPathMetadata itemToPathMetadata(Item item, String username) { boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR); boolean isAuthoritativeDir = false; - final FileStatus fileStatus; + final S3AFileStatus fileStatus; long lastUpdated = 0; if (isDir) { isAuthoritativeDir = !IGNORED_FIELDS.contains(IS_AUTHORITATIVE) @@ -146,8 +148,10 @@ static DDBPathMetadata itemToPathMetadata(Item item, String username) { long len = item.hasAttribute(FILE_LENGTH) ? item.getLong(FILE_LENGTH) : 0; long modTime = item.hasAttribute(MOD_TIME) ? item.getLong(MOD_TIME) : 0; long block = item.hasAttribute(BLOCK_SIZE) ? item.getLong(BLOCK_SIZE) : 0; - fileStatus = new FileStatus(len, false, 1, block, modTime, 0, null, - username, username, path); + String eTag = item.getString(ETAG); + String versionId = item.getString(VERSION_ID); + fileStatus = new S3AFileStatus( + len, modTime, path, block, username, eTag, versionId); } lastUpdated = !IGNORED_FIELDS.contains(LAST_UPDATED) @@ -172,7 +176,7 @@ static DDBPathMetadata itemToPathMetadata(Item item, String username) { */ static Item pathMetadataToItem(DDBPathMetadata meta) { Preconditions.checkNotNull(meta); - final FileStatus status = meta.getFileStatus(); + final S3AFileStatus status = meta.getFileStatus(); final Item item = new Item().withPrimaryKey(pathToKey(status.getPath())); if (status.isDirectory()) { item.withBoolean(IS_DIR, true); @@ -183,6 +187,12 @@ static Item pathMetadataToItem(DDBPathMetadata meta) { item.withLong(FILE_LENGTH, status.getLen()) .withLong(MOD_TIME, status.getModificationTime()) .withLong(BLOCK_SIZE, status.getBlockSize()); + if (status.getETag() != null) { + item.withString(ETAG, status.getETag()); + } + if (status.getVersionId() != null) { + item.withString(VERSION_ID, status.getVersionId()); + } } item.withBoolean(IS_DELETED, meta.isDeleted()); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java index 8234777c3b..26c75e8213 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java @@ -68,7 +68,7 @@ public final class S3Guard { static final Class S3GUARD_DDB_CLIENT_FACTORY_IMPL_DEFAULT = DynamoDBClientFactory.DefaultDynamoDBClientFactory.class; - private static final FileStatus[] EMPTY_LISTING = new FileStatus[0]; + private static final S3AFileStatus[] EMPTY_LISTING = new S3AFileStatus[0]; // Utility class. All static functions. private S3Guard() { } @@ -164,7 +164,7 @@ public static S3AFileStatus putAndReturn(MetadataStore ms, * @param dirMeta directory listing -may be null * @return a possibly-empty array of file status entries */ - public static FileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) { + public static S3AFileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) { if (dirMeta == null) { return EMPTY_LISTING; } @@ -178,7 +178,7 @@ public static FileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) { } } - return statuses.toArray(new FileStatus[0]); + return statuses.toArray(new S3AFileStatus[0]); } /** @@ -201,7 +201,7 @@ public static FileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) { * @throws IOException if metadata store update failed */ public static FileStatus[] dirListingUnion(MetadataStore ms, Path path, - List backingStatuses, DirListingMetadata dirMeta, + List backingStatuses, DirListingMetadata dirMeta, boolean isAuthoritative, ITtlTimeProvider timeProvider) throws IOException { @@ -232,7 +232,7 @@ public static FileStatus[] dirListingUnion(MetadataStore ms, Path path, pm -> pm.getFileStatus().getPath(), PathMetadata::getFileStatus) ); - for (FileStatus s : backingStatuses) { + for (S3AFileStatus s : backingStatuses) { if (deleted.contains(s.getPath())) { continue; } @@ -323,7 +323,7 @@ public static void makeDirsOrdered(MetadataStore ms, List dirs, * [/a/b/file0, /a/b/file1, /a/b/file2, /a/b/file3], isAuthoritative = * true */ - FileStatus prevStatus = null; + S3AFileStatus prevStatus = null; // Use new batched put to reduce round trips. List pathMetas = new ArrayList<>(dirs.size()); @@ -334,8 +334,8 @@ public static void makeDirsOrdered(MetadataStore ms, List dirs, boolean isLeaf = (prevStatus == null); Path f = dirs.get(i); assertQualified(f); - FileStatus status = - createUploadFileStatus(f, true, 0, 0, owner); + S3AFileStatus status = + createUploadFileStatus(f, true, 0, 0, owner, null, null); // We only need to put a DirListingMetadata if we are setting // authoritative bit @@ -383,7 +383,8 @@ public static void addMoveDir(MetadataStore ms, Collection srcPaths, } assertQualified(srcPath, dstPath); - FileStatus dstStatus = createUploadFileStatus(dstPath, true, 0, 0, owner); + S3AFileStatus dstStatus = createUploadFileStatus(dstPath, true, 0, + 0, owner, null, null); addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus); } @@ -399,16 +400,18 @@ public static void addMoveDir(MetadataStore ms, Collection srcPaths, * @param size length of file moved * @param blockSize blocksize to associate with destination file * @param owner file owner to use in created records + * @param eTag the s3 object eTag of file moved + * @param versionId the s3 object versionId of file moved */ public static void addMoveFile(MetadataStore ms, Collection srcPaths, Collection dstMetas, Path srcPath, Path dstPath, - long size, long blockSize, String owner) { + long size, long blockSize, String owner, String eTag, String versionId) { if (isNullMetadataStore(ms)) { return; } assertQualified(srcPath, dstPath); - FileStatus dstStatus = createUploadFileStatus(dstPath, false, - size, blockSize, owner); + S3AFileStatus dstStatus = createUploadFileStatus(dstPath, false, + size, blockSize, owner, eTag, versionId); addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus); } @@ -465,9 +468,8 @@ public static void addAncestors(MetadataStore metadataStore, while (!parent.isRoot()) { PathMetadata directory = metadataStore.get(parent); if (directory == null || directory.isDeleted()) { - FileStatus status = new FileStatus(0, true, 1, 0, 0, 0, null, username, - null, parent); - PathMetadata meta = new PathMetadata(status, Tristate.FALSE, false); + S3AFileStatus s3aStatus = new S3AFileStatus(Tristate.FALSE, parent, username); + PathMetadata meta = new PathMetadata(s3aStatus, Tristate.FALSE, false); newDirs.add(meta); } else { break; @@ -480,7 +482,7 @@ public static void addAncestors(MetadataStore metadataStore, private static void addMoveStatus(Collection srcPaths, Collection dstMetas, Path srcPath, - FileStatus dstStatus) { + S3AFileStatus dstStatus) { srcPaths.add(srcPath); dstMetas.add(new PathMetadata(dstStatus)); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index 1ac167f5a6..448ea9213f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -44,12 +44,12 @@ import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.s3a.MultipartUtils; import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus; import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens; import org.apache.hadoop.fs.s3a.commit.CommitConstants; @@ -703,7 +703,7 @@ private void putParentsIfNotPresent(FileStatus f) throws IOException { if (dirCache.contains(parent)) { return; } - FileStatus dir = DynamoDBMetadataStore.makeDirStatus(parent, + S3AFileStatus dir = DynamoDBMetadataStore.makeDirStatus(parent, f.getOwner()); getStore().put(new PathMetadata(dir)); dirCache.add(parent); @@ -718,13 +718,13 @@ private void putParentsIfNotPresent(FileStatus f) throws IOException { */ private long importDir(FileStatus status) throws IOException { Preconditions.checkArgument(status.isDirectory()); - RemoteIterator it = getFilesystem() + RemoteIterator it = getFilesystem() .listFilesAndEmptyDirectories(status.getPath(), true); long items = 0; while (it.hasNext()) { - LocatedFileStatus located = it.next(); - FileStatus child; + S3ALocatedFileStatus located = it.next(); + S3AFileStatus child; if (located.isDirectory()) { child = DynamoDBMetadataStore.makeDirStatus(located.getPath(), located.getOwner()); @@ -734,7 +734,9 @@ private long importDir(FileStatus status) throws IOException { located.getModificationTime(), located.getPath(), located.getBlockSize(), - located.getOwner()); + located.getOwner(), + located.getETag(), + located.getVersionId()); } putParentsIfNotPresent(child); getStore().put(new PathMetadata(child)); @@ -761,7 +763,8 @@ public int run(String[] args, PrintStream out) throws Exception { filePath = "/"; } Path path = new Path(filePath); - FileStatus status = getFilesystem().getFileStatus(path); + S3AFileStatus status = (S3AFileStatus) getFilesystem() + .getFileStatus(path); try { initMetadataStore(false); @@ -1163,7 +1166,7 @@ public int run(String[] args, PrintStream out) magic ? "is" : "is not"); println(out, "%nS3A Client"); - + printOption(out, "\tSigning Algorithm", SIGNING_ALGORITHM, "(unset)"); String endpoint = conf.getTrimmed(ENDPOINT, ""); println(out, "\tEndpoint: %s=%s", ENDPOINT, @@ -1172,6 +1175,10 @@ public int run(String[] args, PrintStream out) printOption(out, "\tEncryption", SERVER_SIDE_ENCRYPTION_ALGORITHM, "none"); printOption(out, "\tInput seek policy", INPUT_FADVISE, INPUT_FADV_NORMAL); + printOption(out, "\tChange Detection Source", CHANGE_DETECT_SOURCE, + CHANGE_DETECT_SOURCE_DEFAULT); + printOption(out, "\tChange Detection Mode", CHANGE_DETECT_MODE, + CHANGE_DETECT_MODE_DEFAULT); // look at delegation token support if (fs.getDelegationTokens().isPresent()) { diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md index 09e123d6ed..ef9c999359 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md @@ -690,10 +690,15 @@ Filesystem s3a://landsat-pds is not using S3Guard The "magic" committer is not supported S3A Client - Endpoint: fs.s3a.endpoint=(unset) + Signing Algorithm: fs.s3a.signing-algorithm=(unset) + Endpoint: fs.s3a.endpoint=s3.amazonaws.com Encryption: fs.s3a.server-side-encryption-algorithm=none Input seek policy: fs.s3a.experimental.input.fadvise=normal -2017-09-27 19:18:57,917 INFO util.ExitUtil: Exiting with status 46: 46: The magic committer is not enabled for s3a://landsat-pds + Change Detection Source: fs.s3a.change.detection.source=etag + Change Detection Mode: fs.s3a.change.detection.mode=server +Delegation token support is disabled +2019-05-17 13:53:38,245 [main] INFO util.ExitUtil (ExitUtil.java:terminate(210)) - + Exiting with status 46: 46: The magic committer is not enabled for s3a://landsat-pds ``` ## Error message: "File being created has a magic path, but the filesystem has magic file support disabled: diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/delegation_tokens.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/delegation_tokens.md index 30226f85eb..aad3f355b2 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/delegation_tokens.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/delegation_tokens.md @@ -522,12 +522,15 @@ $ hadoop s3guard bucket-info s3a://landsat-pds/ Filesystem s3a://landsat-pds Location: us-west-2 Filesystem s3a://landsat-pds is not using S3Guard -The "magic" committer is supported +The "magic" committer is not supported S3A Client + Signing Algorithm: fs.s3a.signing-algorithm=(unset) Endpoint: fs.s3a.endpoint=s3.amazonaws.com Encryption: fs.s3a.server-side-encryption-algorithm=none Input seek policy: fs.s3a.experimental.input.fadvise=normal + Change Detection Source: fs.s3a.change.detection.source=etag + Change Detection Mode: fs.s3a.change.detection.mode=server Delegation Support enabled: token kind = S3ADelegationToken/Session Hadoop security mode: SIMPLE ``` diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md index bb09d576dc..a766abc616 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md @@ -644,9 +644,13 @@ Metadata Store Diagnostics: The "magic" committer is supported S3A Client + Signing Algorithm: fs.s3a.signing-algorithm=(unset) Endpoint: fs.s3a.endpoint=s3-eu-west-1.amazonaws.com Encryption: fs.s3a.server-side-encryption-algorithm=none Input seek policy: fs.s3a.experimental.input.fadvise=normal + Change Detection Source: fs.s3a.change.detection.source=etag + Change Detection Mode: fs.s3a.change.detection.mode=server +Delegation token support is disabled ``` This listing includes all the information about the table supplied from @@ -1097,6 +1101,111 @@ based on usage information collected from previous days, and choosing a combination of retry counts and an interval which allow for the clients to cope with some throttling, but not to time-out other applications. +## Read-After-Overwrite Consistency + +S3Guard provides read-after-overwrite consistency through ETags (default) or +object versioning checked either on the server (default) or client. This works +such that a reader reading a file after an overwrite either sees the new version +of the file or an error. Without S3Guard, new readers may see the original +version. Once S3 reaches eventual consistency, new readers will see the new +version. + +Readers using S3Guard will usually see the new file version, but may +in rare cases see `RemoteFileChangedException` instead. This would occur if +an S3 object read cannot provide the version tracked in S3Guard metadata. + +S3Guard achieves this behavior by storing ETags and object version IDs in the +S3Guard metadata store (e.g. DynamoDB). On opening a file, S3AFileSystem +will look in S3 for the version of the file indicated by the ETag or object +version ID stored in the metadata store. If that version is unavailable, +`RemoteFileChangedException` is thrown. Whether ETag or version ID and +server or client mode is used is determed by the +[fs.s3a.change.detection configuration options](./index.html#Handling_Read-During-Overwrite). + +### No Versioning Metadata Available + +When the first S3AFileSystem clients are upgraded to a version of +`S3AFileSystem` that contains these change tracking features, any existing +S3Guard metadata will not contain ETags or object version IDs. Reads of files +tracked in such S3Guard metadata will access whatever version of the file is +available in S3 at the time of read. Only if the file is subsequently updated +will S3Guard start tracking ETag and object version ID and as such generating +`RemoteFileChangedException` if an inconsistency is detected. + +Similarly, when S3Guard metadata is pruned, S3Guard will no longer be able to +detect an inconsistent read. S3Guard metadata should be retained for at least +as long as the perceived possible read-after-overwrite temporary inconsistency +window. That window is expected to be short, but there are no guarantees so it +is at the administrator's discretion to weigh the risk. + +### Known Limitations + +#### S3 Select + +S3 Select does not provide a capability for server-side ETag or object +version ID qualification. Whether `fs.s3a.change.detection.mode` is "client" or +"server", S3Guard will cause a client-side check of the file version before +opening the file with S3 Select. If the current version does not match the +version tracked in S3Guard, `RemoteFileChangedException` is thrown. + +It is still possible that the S3 Select read will access a different version of +the file, if the visible file version changes between the version check and +the opening of the file. This can happen due to eventual consistency or +an overwrite of the file between the version check and the open of the file. + +#### Rename + +Rename is implemented via copy in S3. With `fs.s3a.change.detection.mode` set +to "client", a fully reliable mechansim for ensuring the copied content is the expected +content is not possible. This is the case since there isn't necessarily a way +to know the expected ETag or version ID to appear on the object resulting from +the copy. + +Furthermore, if `fs.s3a.change.detection.mode` is "server" and a third-party S3 +implementation is used that doesn't honor the provided ETag or version ID, +S3AFileSystem and S3Guard cannot detect it. + +When `fs.s3.change.detection.mode` is "client", a client-side check +will be performed before the copy to ensure the current version of the file +matches S3Guard metadata. If not, `RemoteFileChangedException` is thrown. +Similar to as discussed with regard to S3 Select, this is not sufficient to +guarantee that same version is the version copied. + +When `fs.s3.change.detection.mode` server, the expected version is also specified +in the underlying S3 `CopyObjectRequest`. As long as the server honors it, the +copied object will be correct. + +All this said, with the defaults of `fs.s3.change.detection.mode` of "server" and +`fs.s3.change.detection.source` of "etag", when working with Amazon's S3, copy should in fact +either copy the expected file version or, in the case of an eventual consistency +anomaly, generate `RemoteFileChangedException`. The same should be true when +`fs.s3.change.detection.source` = "versionid". + +#### Out of Sync Metadata + +The S3Guard version tracking metadata (ETag or object version ID) could become +out of sync with the true current object metadata in S3. For example, S3Guard +is still tracking v1 of some file after v2 has been written. This could occur +for reasons such as a writer writing without utilizing S3Guard and/or +S3AFileSystem or simply due to a write with S3AFileSystem and S3Guard that wrote +successfully to S3, but failed in communication with S3Guard's metadata store +(e.g. DynamoDB). + +If this happens, reads of the affected file(s) will result in +`RemoteFileChangedException` until one of: + +* the S3Guard metadata is corrected out-of-band +* the file is overwritten (causing an S3Guard metadata update) +* the S3Guard metadata is pruned + +The S3Guard metadata for a file can be corrected with the `s3guard import` +command as discussed above. The command can take a file URI instead of a +bucket URI to correct the metadata for a single file. For example: + +```bash +hadoop s3guard import [-meta URI] s3a://my-bucket/file-with-bad-metadata +``` + ## Troubleshooting ### Error: `S3Guard table lacks version marker.` @@ -1215,6 +1324,97 @@ sync. See [Fail on Error](#fail-on-error) for more detail. +### Error `RemoteFileChangedException` + +An exception like the following could occur for a couple of reasons: + +* the S3Guard metadata is out of sync with the true S3 metadata. For +example, the S3Guard DynamoDB table is tracking a different ETag than the ETag +shown in the exception. This may suggest the object was updated in S3 without +involvement from S3Guard or there was a transient failure when S3Guard tried to +write to DynamoDB. + +* S3 is exhibiting read-after-overwrite temporary inconsistency. The S3Guard +metadata was updated with a new ETag during a recent write, but the current read +is not seeing that ETag due to S3 eventual consistency. This exception prevents +the reader from an inconsistent read where the reader sees an older version of +the file. + +``` +org.apache.hadoop.fs.s3a.RemoteFileChangedException: open 's3a://my-bucket/test/file.txt': + Change reported by S3 while reading at position 0. + ETag 4e886e26c072fef250cfaf8037675405 was unavailable + at org.apache.hadoop.fs.s3a.impl.ChangeTracker.processResponse(ChangeTracker.java:167) + at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:207) + at org.apache.hadoop.fs.s3a.S3AInputStream.lambda$lazySeek$1(S3AInputStream.java:355) + at org.apache.hadoop.fs.s3a.Invoker.lambda$retry$2(Invoker.java:195) + at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:109) + at org.apache.hadoop.fs.s3a.Invoker.lambda$retry$3(Invoker.java:265) + at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:322) + at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:261) + at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:193) + at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:215) + at org.apache.hadoop.fs.s3a.S3AInputStream.lazySeek(S3AInputStream.java:348) + at org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:381) + at java.io.FilterInputStream.read(FilterInputStream.java:83) +``` + +### Error `AWSClientIOException: copyFile` caused by `NullPointerException` + +The AWS SDK has an [issue](https://github.com/aws/aws-sdk-java/issues/1644) +where it will throw a relatively generic `AmazonClientException` caused by +`NullPointerException` when copying a file and specifying a precondition +that cannot be met. This can bubble up from `S3AFileSystem.rename()`. It +suggests that the file in S3 is inconsistent with the metadata in S3Guard. + +``` +org.apache.hadoop.fs.s3a.AWSClientIOException: copyFile(test/rename-eventually2.dat, test/dest2.dat) on test/rename-eventually2.dat: com.amazonaws.AmazonClientException: Unable to complete transfer: null: Unable to complete transfer: null + at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:201) + at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:111) + at org.apache.hadoop.fs.s3a.Invoker.lambda$retry$4(Invoker.java:314) + at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:406) + at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:310) + at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:285) + at org.apache.hadoop.fs.s3a.S3AFileSystem.copyFile(S3AFileSystem.java:3034) + at org.apache.hadoop.fs.s3a.S3AFileSystem.innerRename(S3AFileSystem.java:1258) + at org.apache.hadoop.fs.s3a.S3AFileSystem.rename(S3AFileSystem.java:1119) + at org.apache.hadoop.fs.s3a.ITestS3ARemoteFileChanged.lambda$testRenameEventuallyConsistentFile2$6(ITestS3ARemoteFileChanged.java:556) + at org.apache.hadoop.test.LambdaTestUtils.intercept(LambdaTestUtils.java:498) + at org.apache.hadoop.fs.s3a.ITestS3ARemoteFileChanged.testRenameEventuallyConsistentFile2(ITestS3ARemoteFileChanged.java:554) + at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) + at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) + at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) + at java.lang.reflect.Method.invoke(Method.java:498) + at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50) + at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) + at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47) + at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) + at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26) + at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27) + at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55) + at org.junit.internal.runners.statements.FailOnTimeout$CallableStatement.call(FailOnTimeout.java:298) + at org.junit.internal.runners.statements.FailOnTimeout$CallableStatement.call(FailOnTimeout.java:292) + at java.util.concurrent.FutureTask.run(FutureTask.java:266) + at java.lang.Thread.run(Thread.java:748) +Caused by: com.amazonaws.AmazonClientException: Unable to complete transfer: null + at com.amazonaws.services.s3.transfer.internal.AbstractTransfer.unwrapExecutionException(AbstractTransfer.java:286) + at com.amazonaws.services.s3.transfer.internal.AbstractTransfer.rethrowExecutionException(AbstractTransfer.java:265) + at com.amazonaws.services.s3.transfer.internal.CopyImpl.waitForCopyResult(CopyImpl.java:67) + at org.apache.hadoop.fs.s3a.impl.CopyOutcome.waitForCopy(CopyOutcome.java:72) + at org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$copyFile$14(S3AFileSystem.java:3047) + at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:109) + ... 25 more +Caused by: java.lang.NullPointerException + at com.amazonaws.services.s3.transfer.internal.CopyCallable.copyInOneChunk(CopyCallable.java:154) + at com.amazonaws.services.s3.transfer.internal.CopyCallable.call(CopyCallable.java:134) + at com.amazonaws.services.s3.transfer.internal.CopyMonitor.call(CopyMonitor.java:132) + at com.amazonaws.services.s3.transfer.internal.CopyMonitor.call(CopyMonitor.java:43) + at java.util.concurrent.FutureTask.run(FutureTask.java:266) + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) + ... 1 more +``` + ## Other Topics For details on how to test S3Guard, see [Testing S3Guard](./testing.html#s3guard) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md index 707694c8c6..f61b46a8da 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md @@ -298,6 +298,24 @@ plugin: ```bash mvn surefire-report:failsafe-report-only ``` +## Testing Versioned Stores + +Some tests (specifically some in `ITestS3ARemoteFileChanged`) require +a versioned bucket for full test coverage as well as S3Guard being enabled. + +To enable versioning in a bucket. + +1. In the AWS S3 Management console find and select the bucket. +1. In the Properties "tab", set it as versioned. +1. Important Create a lifecycle rule to automatically clean up old versions +after 24h. This avoids running up bills for objects which tests runs create and +then delete. +1. Run the tests again. + +Once a bucket is converted to being versioned, it cannot be converted back +to being unversioned. + + ## Scale Tests There are a set of tests designed to measure the scalability and performance diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md index 3123221bd8..8cdac9e352 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md @@ -970,8 +970,8 @@ and the like. The standard strategy here is to save to HDFS and then copy to S3. ``` org.apache.hadoop.fs.s3a.RemoteFileChangedException: re-open `s3a://my-bucket/test/file.txt': - ETag change reported by S3 while reading at position 1949. - Version f9c186d787d4de9657e99f280ba26555 was unavailable + Change reported by S3 while reading at position 1949. + ETag f9c186d787d4de9657e99f280ba26555 was unavailable at org.apache.hadoop.fs.s3a.impl.ChangeTracker.processResponse(ChangeTracker.java:137) at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:200) at org.apache.hadoop.fs.s3a.S3AInputStream.lambda$lazySeek$1(S3AInputStream.java:346) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java index 03c91e62ce..886795a9d9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java @@ -56,19 +56,26 @@ public abstract class AbstractS3AMockTest { @Before public void setup() throws Exception { + Configuration conf = createConfiguration(); + fs = new S3AFileSystem(); + URI uri = URI.create(FS_S3A + "://" + BUCKET); + fs.initialize(uri, conf); + s3 = fs.getAmazonS3ClientForTesting("mocking"); + } + + public Configuration createConfiguration() { Configuration conf = new Configuration(); conf.setClass(S3_CLIENT_FACTORY_IMPL, MockS3ClientFactory.class, S3ClientFactory.class); - // We explicitly disable MetadataStore even if it's configured. For unit + // We explicitly disable MetadataStore. For unit // test we don't issue request to AWS DynamoDB service. conf.setClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class, MetadataStore.class); // FS is always magic conf.setBoolean(CommitConstants.MAGIC_COMMITTER_ENABLED, true); - fs = new S3AFileSystem(); - URI uri = URI.create(FS_S3A + "://" + BUCKET); - fs.initialize(uri, conf); - s3 = fs.getAmazonS3ClientForTesting("mocking"); + // use minimum multipart size for faster triggering + conf.setLong(Constants.MULTIPART_SIZE, MULTIPART_MIN_SIZE); + return conf; } @After diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADelayedFNF.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADelayedFNF.java index 7abd474976..870172ec3e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADelayedFNF.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADelayedFNF.java @@ -22,7 +22,11 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; import org.apache.hadoop.test.LambdaTestUtils; + +import org.junit.Assume; import org.junit.Test; import java.io.FileNotFoundException; @@ -43,6 +47,12 @@ public class ITestS3ADelayedFNF extends AbstractS3ATestBase { @Test public void testNotFoundFirstRead() throws Exception { FileSystem fs = getFileSystem(); + ChangeDetectionPolicy changeDetectionPolicy = + ((S3AFileSystem) fs).getChangeDetectionPolicy(); + Assume.assumeFalse("FNF not expected when using a bucket with" + + " object versioning", + changeDetectionPolicy.getSource() == Source.VersionId); + Path p = path("some-file"); ContractTestUtils.createFile(fs, p, false, new byte[] {20, 21, 22}); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java index 6ac803e308..da671030c2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AInconsistency.java @@ -24,9 +24,13 @@ import org.apache.hadoop.fs.contract.AbstractFSContract; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.contract.s3a.S3AContract; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; import org.apache.hadoop.fs.s3a.s3guard.MetadataStore; import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore; import org.apache.hadoop.test.LambdaTestUtils; + +import org.junit.Assume; import org.junit.Test; import java.io.FileNotFoundException; @@ -106,6 +110,12 @@ public void testGetFileStatus() throws Exception { @Test public void testOpenDeleteRead() throws Exception { S3AFileSystem fs = getFileSystem(); + ChangeDetectionPolicy changeDetectionPolicy = + ((S3AFileSystem) fs).getChangeDetectionPolicy(); + Assume.assumeFalse("FNF not expected when using a bucket with" + + " object versioning", + changeDetectionPolicy.getSource() == Source.VersionId); + Path p = path("testOpenDeleteRead.txt"); writeTextFile(fs, p, "1337c0d3z", true); try (InputStream s = fs.open(p)) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java index 98dd2026f5..c345a1f9da 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARemoteFileChanged.java @@ -19,77 +19,258 @@ package org.apache.hadoop.fs.s3a; import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.charset.Charset; import java.util.Arrays; import java.util.Collection; +import java.util.Optional; +import com.amazonaws.AmazonClientException; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.model.CopyObjectRequest; +import com.amazonaws.services.s3.model.CopyObjectResult; +import com.amazonaws.services.s3.model.GetObjectMetadataRequest; +import com.amazonaws.services.s3.model.GetObjectRequest; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.S3Object; +import com.google.common.base.Charsets; import org.junit.Assume; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; +import org.mockito.ArgumentMatchers; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Mode; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; +import org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore; +import org.apache.hadoop.fs.s3a.s3guard.MetadataStore; +import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore; +import org.apache.hadoop.fs.s3a.s3guard.PathMetadata; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.contract.ContractTestUtils.readUTF8; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBucketOverrides; +import static org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.CHANGE_DETECTED; +import static org.apache.hadoop.fs.s3a.select.SelectConstants.S3_SELECT_CAPABILITY; +import static org.apache.hadoop.fs.s3a.select.SelectConstants.SELECT_SQL; import static org.apache.hadoop.test.LambdaTestUtils.eventually; import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.when; /** * Test S3A remote file change detection. + * This is a very parameterized test; the first three parameters + * define configuration options for the tests, while the final one + * declares the expected outcomes given those options. + * + * This test uses mocking to insert transient failures into the S3 client, + * underneath the S3A Filesystem instance. + * + * This is used to simulate eventual consistency, so force the change policy + * failure modes to be encountered. + * + * If changes are made to the filesystem such that the number of calls to + * operations such as {@link S3AFileSystem#getObjectMetadata(Path)} are + * changed, the number of failures which the mock layer must generate may + * change. + * + * As the S3Guard auth mode flag does control whether or not a HEAD is issued + * in a call to {@code getFileStatus()}; the test parameter {@link #authMode} + * is used to help predict this count. + * + * Important: if you are seeing failures in this test after changing + * one of the rename/copy/open operations, it may be that an increase, + * decrease or change in the number of low-level S3 HEAD/GET operations is + * triggering the failures. + * Please review the changes to see that you haven't unintentionally done this. + * If it is intentional, please update the parameters here. + * + * If you are seeing failures without such a change, and nobody else is, + * it is likely that you have a different bucket configuration option which + * is somehow triggering a regression. If you can work out which option + * this is, then extend {@link #createConfiguration()} to reset that parameter + * too. + * + * Note: to help debug these issues, set the log for this to DEBUG: + *
    + *   log4j.logger.org.apache.hadoop.fs.s3a.ITestS3ARemoteFileChanged=DEBUG
    + * 
    + * The debug information printed will include a trace of where operations + * are being called from, to help understand why the test is failing. */ @RunWith(Parameterized.class) public class ITestS3ARemoteFileChanged extends AbstractS3ATestBase { + private static final Logger LOG = LoggerFactory.getLogger(ITestS3ARemoteFileChanged.class); + private static final String TEST_DATA = "Some test data"; + + private static final byte[] TEST_DATA_BYTES = TEST_DATA.getBytes( + Charsets.UTF_8); + private static final int TEST_MAX_RETRIES = 5; + private static final String TEST_RETRY_INTERVAL = "10ms"; + private static final String QUOTED_TEST_DATA = + "\"" + TEST_DATA + "\""; + + private Optional originalS3Client = Optional.empty(); + + private enum InteractionType { + READ, + READ_AFTER_DELETE, + EVENTUALLY_CONSISTENT_READ, + COPY, + EVENTUALLY_CONSISTENT_COPY, + EVENTUALLY_CONSISTENT_METADATA, + SELECT, + EVENTUALLY_CONSISTENT_SELECT + } + private final String changeDetectionSource; private final String changeDetectionMode; - private final boolean expectChangeException; - private final boolean expectFileNotFoundException; + private final boolean authMode; + private final Collection expectedExceptionInteractions; + private S3AFileSystem fs; - @Parameterized.Parameters + /** + * Test parameters. + *
      + *
    1. Change detection source: etag or version.
    2. + *
    3. Change detection policy: server, client, client+warn, none
    4. + *
    5. Whether to enable auth mode on the filesystem.
    6. + *
    7. Expected outcomes.
    8. + *
    + * @return the test configuration. + */ + @Parameterized.Parameters(name = "{0}-{1}-auth-{2}") public static Collection params() { return Arrays.asList(new Object[][]{ // make sure it works with invalid config - {"bogus", "bogus", true, true}, + {"bogus", "bogus", + true, + Arrays.asList( + InteractionType.READ, + InteractionType.READ_AFTER_DELETE, + InteractionType.EVENTUALLY_CONSISTENT_READ, + InteractionType.COPY, + InteractionType.EVENTUALLY_CONSISTENT_COPY, + InteractionType.EVENTUALLY_CONSISTENT_METADATA, + InteractionType.SELECT, + InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, // test with etag - {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_SERVER, true, true}, - {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_CLIENT, true, true}, - {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_WARN, false, true}, - {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_NONE, false, true}, + {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_SERVER, + true, + Arrays.asList( + InteractionType.READ, + InteractionType.READ_AFTER_DELETE, + InteractionType.EVENTUALLY_CONSISTENT_READ, + InteractionType.COPY, + InteractionType.EVENTUALLY_CONSISTENT_COPY, + InteractionType.EVENTUALLY_CONSISTENT_METADATA, + InteractionType.SELECT, + InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, + {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_CLIENT, + false, + Arrays.asList( + InteractionType.READ, + InteractionType.EVENTUALLY_CONSISTENT_READ, + InteractionType.READ_AFTER_DELETE, + InteractionType.COPY, + // not InteractionType.EVENTUALLY_CONSISTENT_COPY as copy change + // detection can't really occur client-side. The eTag of + // the new object can't be expected to match. + InteractionType.EVENTUALLY_CONSISTENT_METADATA, + InteractionType.SELECT, + InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, + {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_WARN, + false, + Arrays.asList( + InteractionType.READ_AFTER_DELETE)}, + {CHANGE_DETECT_SOURCE_ETAG, CHANGE_DETECT_MODE_NONE, + false, + Arrays.asList( + InteractionType.READ_AFTER_DELETE)}, // test with versionId - // when using server-side versionId, the exceptions shouldn't happen - // since the previous version will still be available - {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_SERVER, false, - false}, + // when using server-side versionId, the exceptions + // shouldn't happen since the previous version will still be available + {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_SERVER, + true, + Arrays.asList( + InteractionType.EVENTUALLY_CONSISTENT_READ, + InteractionType.EVENTUALLY_CONSISTENT_COPY, + InteractionType.EVENTUALLY_CONSISTENT_METADATA, + InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, // with client-side versionId it will behave similar to client-side eTag - {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_CLIENT, true, - true}, + {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_CLIENT, + false, + Arrays.asList( + InteractionType.READ, + InteractionType.READ_AFTER_DELETE, + InteractionType.EVENTUALLY_CONSISTENT_READ, + InteractionType.COPY, + // not InteractionType.EVENTUALLY_CONSISTENT_COPY as copy change + // detection can't really occur client-side. The versionId of + // the new object can't be expected to match. + InteractionType.EVENTUALLY_CONSISTENT_METADATA, + InteractionType.SELECT, + InteractionType.EVENTUALLY_CONSISTENT_SELECT)}, - {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_WARN, false, true}, - {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_NONE, false, true} + {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_WARN, + true, + Arrays.asList( + InteractionType.READ_AFTER_DELETE)}, + {CHANGE_DETECT_SOURCE_VERSION_ID, CHANGE_DETECT_MODE_NONE, + false, + Arrays.asList( + InteractionType.READ_AFTER_DELETE)} }); } public ITestS3ARemoteFileChanged(String changeDetectionSource, String changeDetectionMode, - boolean expectException, - boolean expectFileNotFoundException) { + boolean authMode, + Collection expectedExceptionInteractions) { this.changeDetectionSource = changeDetectionSource; this.changeDetectionMode = changeDetectionMode; - this.expectChangeException = expectException; - this.expectFileNotFoundException = expectFileNotFoundException; + this.authMode = authMode; + this.expectedExceptionInteractions = expectedExceptionInteractions; + } + + @Override + public void setup() throws Exception { + super.setup(); + // skip all versioned checks if the remote FS doesn't do + // versions. + fs = getFileSystem(); + skipIfVersionPolicyAndNoVersionId(); + // cache the original S3 client for teardown. + originalS3Client = Optional.of( + fs.getAmazonS3ClientForTesting("caching")); + } + + @Override + public void teardown() throws Exception { + // restore the s3 client so there's no mocking interfering with the teardown + originalS3Client.ifPresent(fs::setAmazonS3Client); + super.teardown(); } @Override @@ -98,33 +279,65 @@ protected Configuration createConfiguration() { String bucketName = getTestBucketName(conf); removeBucketOverrides(bucketName, conf, CHANGE_DETECT_SOURCE, - CHANGE_DETECT_MODE); + CHANGE_DETECT_MODE, + RETRY_LIMIT, + RETRY_INTERVAL, + METADATASTORE_AUTHORITATIVE); conf.set(CHANGE_DETECT_SOURCE, changeDetectionSource); conf.set(CHANGE_DETECT_MODE, changeDetectionMode); + conf.setBoolean(METADATASTORE_AUTHORITATIVE, authMode); + + // reduce retry limit so FileNotFoundException cases timeout faster, + // speeding up the tests + conf.setInt(RETRY_LIMIT, TEST_MAX_RETRIES); + conf.set(RETRY_INTERVAL, TEST_RETRY_INTERVAL); + + if (conf.getClass(S3_METADATA_STORE_IMPL, MetadataStore.class) == + NullMetadataStore.class) { + LOG.debug("Enabling local S3Guard metadata store"); + // favor LocalMetadataStore over NullMetadataStore + conf.setClass(S3_METADATA_STORE_IMPL, + LocalMetadataStore.class, MetadataStore.class); + } S3ATestUtils.disableFilesystemCaching(conf); return conf; } + /** + * Get the path of this method, including parameterized values. + * @return a path unique to this method and parameters + * @throws IOException failure. + */ + protected Path path() throws IOException { + return super.path(getMethodName()); + } + + /** + * How many HEAD requests are made in a call to + * {@link S3AFileSystem#getFileStatus(Path)}? + * @return a number >= 0. + */ + private int getFileStatusHeadCount() { + return authMode ? 0 : 1; + } + + /** + * Tests reading a file that is changed while the reader's InputStream is + * open. + */ @Test - public void testReadFileChanged() throws Throwable { + public void testReadFileChangedStreamOpen() throws Throwable { + describe("Tests reading a file that is changed while the reader's " + + "InputStream is open."); final int originalLength = 8192; final byte[] originalDataset = dataset(originalLength, 'a', 32); final int newLength = originalLength + 1; final byte[] newDataset = dataset(newLength, 'A', 32); - final S3AFileSystem fs = getFileSystem(); final Path testpath = path("readFileToChange.txt"); // initial write writeDataset(fs, testpath, originalDataset, originalDataset.length, 1024, false); - if (fs.getChangeDetectionPolicy().getSource() == Source.VersionId) { - // skip versionId tests if the bucket doesn't have object versioning - // enabled - Assume.assumeTrue( - "Target filesystem does not support versioning", - fs.getObjectMetadata(fs.pathToKey(testpath)).getVersionId() != null); - } - try(FSDataInputStream instream = fs.open(testpath)) { // seek forward and read successfully instream.seek(1024); @@ -152,9 +365,8 @@ public void testReadFileChanged() throws Throwable { // now check seek backward instream.seek(instream.getPos() - 100); - if (expectChangeException) { - intercept(RemoteFileChangedException.class, "", "read", - () -> instream.read()); + if (expectedExceptionInteractions.contains(InteractionType.READ)) { + expectReadFailure(instream); } else { instream.read(); } @@ -164,9 +376,8 @@ public void testReadFileChanged() throws Throwable { // seek backward instream.seek(0); - if (expectChangeException) { - intercept(RemoteFileChangedException.class, "", "read", - () -> instream.read(buf)); + if (expectedExceptionInteractions.contains(InteractionType.READ)) { + expectReadFailure(instream); intercept(RemoteFileChangedException.class, "", "read", () -> instream.read(0, buf, 0, buf.length)); intercept(RemoteFileChangedException.class, "", "readfully", @@ -183,7 +394,8 @@ public void testReadFileChanged() throws Throwable { // seek backward instream.seek(0); - if (expectFileNotFoundException) { + if (expectedExceptionInteractions.contains( + InteractionType.READ_AFTER_DELETE)) { intercept(FileNotFoundException.class, "", "read()", () -> instream.read()); intercept(FileNotFoundException.class, "", "readfully", @@ -194,4 +406,890 @@ public void testReadFileChanged() throws Throwable { } } } + + /** + * Tests reading a file where the version visible in S3 does not match the + * version tracked in the metadata store. + */ + @Test + public void testReadFileChangedOutOfSyncMetadata() throws Throwable { + final Path testpath = writeOutOfSyncFileVersion("fileChangedOutOfSync.dat"); + + try (FSDataInputStream instream = fs.open(testpath)) { + if (expectedExceptionInteractions.contains(InteractionType.READ)) { + expectReadFailure(instream); + } else { + instream.read(); + } + } + } + + /** + * Ensures a file can be read when there is no version metadata + * (ETag, versionId). + */ + @Test + public void testReadWithNoVersionMetadata() throws Throwable { + final Path testpath = writeFileWithNoVersionMetadata("readnoversion.dat"); + + assertEquals("Contents of " + testpath, + TEST_DATA, + readUTF8(fs, testpath, -1)); + } + + /** + * Tests using S3 Select on a file where the version visible in S3 does not + * match the version tracked in the metadata store. + */ + @Test + public void testSelectChangedFile() throws Throwable { + requireS3Select(); + final Path testpath = writeOutOfSyncFileVersion("select.dat"); + + if (expectedExceptionInteractions.contains(InteractionType.SELECT)) { + interceptFuture(RemoteFileChangedException.class, "select", + fs.openFile(testpath) + .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build()); + } else { + fs.openFile(testpath) + .must(SELECT_SQL, "SELECT * FROM S3OBJECT") + .build() + .get() + .close(); + } + } + + /** + * Tests using S3 Select on a file where the version visible in S3 does not + * initially match the version tracked in the metadata store, but eventually + * (after retries) does. + */ + @Test + public void testSelectEventuallyConsistentFile() throws Throwable { + describe("Eventually Consistent S3 Select"); + requireS3Guard(); + requireS3Select(); + AmazonS3 s3ClientSpy = spyOnFilesystem(); + + final Path testpath1 = writeEventuallyConsistentFileVersion( + "select1.dat", s3ClientSpy, 0, TEST_MAX_RETRIES, 0); + + // should succeed since the inconsistency doesn't last longer than the + // configured retry limit + fs.openFile(testpath1) + .must(SELECT_SQL, "SELECT * FROM S3OBJECT") + .build() + .get() + .close(); + + // select() makes a getFileStatus() call before the consistency checking + // that will match the stub. As such, we need an extra inconsistency here + // to cross the threshold + int getMetadataInconsistencyCount = TEST_MAX_RETRIES + 2; + final Path testpath2 = writeEventuallyConsistentFileVersion( + "select2.dat", s3ClientSpy, 0, getMetadataInconsistencyCount, 0); + + if (expectedExceptionInteractions.contains( + InteractionType.EVENTUALLY_CONSISTENT_SELECT)) { + // should fail since the inconsistency lasts longer than the configured + // retry limit + interceptFuture(RemoteFileChangedException.class, "select", + fs.openFile(testpath2) + .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build()); + } else { + fs.openFile(testpath2) + .must(SELECT_SQL, "SELECT * FROM S3OBJECT") + .build() + .get() + .close(); + } + } + + /** + * Ensures a file can be read via S3 Select when there is no version metadata + * (ETag, versionId). + */ + @Test + public void testSelectWithNoVersionMetadata() throws Throwable { + requireS3Select(); + final Path testpath = + writeFileWithNoVersionMetadata("selectnoversion.dat"); + + try (FSDataInputStream instream = fs.openFile(testpath) + .must(SELECT_SQL, "SELECT * FROM S3OBJECT").build().get()) { + assertEquals(QUOTED_TEST_DATA, + IOUtils.toString(instream, Charset.forName("UTF-8")).trim()); + } + } + + /** + * Tests doing a rename() on a file where the version visible in S3 does not + * match the version tracked in the metadata store. + * @throws Throwable failure + */ + @Test + public void testRenameChangedFile() throws Throwable { + final Path testpath = writeOutOfSyncFileVersion("rename.dat"); + + final Path dest = path("dest.dat"); + if (expectedExceptionInteractions.contains(InteractionType.COPY)) { + intercept(RemoteFileChangedException.class, "", + "expected copy() failure", + () -> fs.rename(testpath, dest)); + } else { + fs.rename(testpath, dest); + } + } + + /** + * Inconsistent response counts for getObjectMetadata() and + * copyObject() for a rename. + * @param metadataCallsExpectedBeforeRetryLoop number of getObjectMetadata + * calls expected before the consistency checking retry loop + * @return the inconsistencies for (metadata, copy) + */ + private Pair renameInconsistencyCounts( + int metadataCallsExpectedBeforeRetryLoop) { + int metadataInconsistencyCount = TEST_MAX_RETRIES + + metadataCallsExpectedBeforeRetryLoop; + int copyInconsistencyCount = + versionCheckingIsOnServer() ? TEST_MAX_RETRIES : 0; + + return Pair.of(metadataInconsistencyCount, copyInconsistencyCount); + } + + /** + * Tests doing a rename() on a file where the version visible in S3 does not + * match the version in the metadata store until a certain number of retries + * has been met. + */ + @Test + public void testRenameEventuallyConsistentFile() throws Throwable { + requireS3Guard(); + AmazonS3 s3ClientSpy = spyOnFilesystem(); + + // Total inconsistent response count across getObjectMetadata() and + // copyObject(). + // The split of inconsistent responses between getObjectMetadata() and + // copyObject() is arbitrary. + Pair counts = renameInconsistencyCounts( + getFileStatusHeadCount()); + int metadataInconsistencyCount = counts.getLeft(); + int copyInconsistencyCount = counts.getRight(); + final Path testpath1 = + writeEventuallyConsistentFileVersion("rename-eventually1.dat", + s3ClientSpy, + 0, + metadataInconsistencyCount, + copyInconsistencyCount); + + final Path dest1 = path("dest1.dat"); + // shouldn't fail since the inconsistency doesn't last through the + // configured retry limit + fs.rename(testpath1, dest1); + } + + /** + * Tests doing a rename() on a file where the version visible in S3 does not + * match the version in the metadata store until a certain number of retries + * has been met. + * The test expects failure by AWSClientIOException caused by NPE due to + * https://github.com/aws/aws-sdk-java/issues/1644 + */ + @Test + public void testRenameEventuallyConsistentFileNPE() throws Throwable { + requireS3Guard(); + skipIfVersionPolicyAndNoVersionId(); + AmazonS3 s3ClientSpy = spyOnFilesystem(); + + Pair counts = renameInconsistencyCounts( + getFileStatusHeadCount()); + int metadataInconsistencyCount = counts.getLeft(); + int copyInconsistencyCount = counts.getRight(); + // giving copyInconsistencyCount + 1 here should trigger the failure, + // exceeding the retry limit + final Path testpath2 = + writeEventuallyConsistentFileVersion("rename-eventuallyNPE.dat", + s3ClientSpy, + 0, + metadataInconsistencyCount, + copyInconsistencyCount + 1); + final Path dest2 = path("destNPE.dat"); + if (expectedExceptionInteractions.contains( + InteractionType.EVENTUALLY_CONSISTENT_COPY)) { + // should fail since the inconsistency is set up to persist longer than + // the configured retry limit + // the expected exception is not RemoteFileChangedException due to + // https://github.com/aws/aws-sdk-java/issues/1644 + // If this test is failing after an AWS SDK update, + // then it means the SDK bug is fixed. + // Please update this test to match the new behavior. + AWSClientIOException exception = + intercept(AWSClientIOException.class, + "Unable to complete transfer: null", + "expected copy() failure", + () -> fs.rename(testpath2, dest2)); + AmazonClientException cause = exception.getCause(); + if (cause == null) { + // no cause; something else went wrong: throw. + throw new AssertionError("No inner cause", + exception); + } + Throwable causeCause = cause.getCause(); + if (!(causeCause instanceof NullPointerException)) { + // null causeCause or it is the wrong type: throw + throw new AssertionError("Innermost cause is not NPE", + exception); + } + } else { + fs.rename(testpath2, dest2); + } + } + + /** + * Tests doing a rename() on a file where the version visible in S3 does not + * match the version in the metadata store until a certain number of retries + * has been met. + * The test expects failure by RemoteFileChangedException. + */ + @Test + public void testRenameEventuallyConsistentFileRFCE() throws Throwable { + requireS3Guard(); + skipIfVersionPolicyAndNoVersionId(); + AmazonS3 s3ClientSpy = spyOnFilesystem(); + + Pair counts = renameInconsistencyCounts( + getFileStatusHeadCount()); + int metadataInconsistencyCount = counts.getLeft(); + int copyInconsistencyCount = counts.getRight(); + // giving metadataInconsistencyCount + 1 here should trigger the failure, + // exceeding the retry limit + final Path testpath2 = + writeEventuallyConsistentFileVersion("rename-eventuallyRFCE.dat", + s3ClientSpy, + 0, + metadataInconsistencyCount + 1, + copyInconsistencyCount); + final Path dest2 = path("destRFCE.dat"); + if (expectedExceptionInteractions.contains( + InteractionType.EVENTUALLY_CONSISTENT_METADATA)) { + // should fail since the inconsistency is set up to persist longer than + // the configured retry limit + intercept(RemoteFileChangedException.class, + CHANGE_DETECTED, + "expected copy() failure", + () -> fs.rename(testpath2, dest2)); + } else { + fs.rename(testpath2, dest2); + } + } + + /** + * Tests doing a rename() on a directory containing + * an file which is eventually consistent. + * There is no call to getFileStatus on the source file whose + * inconsistency is simulated; the state of S3Guard auth mode is not + * relevant. + */ + @Test + public void testRenameEventuallyConsistentDirectory() throws Throwable { + requireS3Guard(); + AmazonS3 s3ClientSpy = spyOnFilesystem(); + Path basedir = path(); + Path sourcedir = new Path(basedir, "sourcedir"); + fs.mkdirs(sourcedir); + Path destdir = new Path(basedir, "destdir"); + String inconsistent = "inconsistent"; + String consistent = "consistent"; + Path inconsistentFile = new Path(sourcedir, inconsistent); + Path consistentFile = new Path(sourcedir, consistent); + + // write the consistent data + writeDataset(fs, consistentFile, TEST_DATA_BYTES, TEST_DATA_BYTES.length, + 1024, true, true); + + Pair counts = renameInconsistencyCounts(0); + int metadataInconsistencyCount = counts.getLeft(); + int copyInconsistencyCount = counts.getRight(); + + writeEventuallyConsistentData( + s3ClientSpy, + inconsistentFile, + TEST_DATA_BYTES, + 0, + metadataInconsistencyCount, + copyInconsistencyCount); + + // must not fail since the inconsistency doesn't last through the + // configured retry limit + fs.rename(sourcedir, destdir); + } + + /** + * Ensures a file can be renamed when there is no version metadata + * (ETag, versionId). + */ + @Test + public void testRenameWithNoVersionMetadata() throws Throwable { + final Path testpath = + writeFileWithNoVersionMetadata("renamenoversion.dat"); + + final Path dest = path("noversiondest.dat"); + fs.rename(testpath, dest); + assertEquals("Contents of " + dest, + TEST_DATA, + readUTF8(fs, dest, -1)); + } + + /** + * Ensures S3Guard and retries allow an eventually consistent read. + */ + @Test + public void testReadAfterEventuallyConsistentWrite() throws Throwable { + requireS3Guard(); + AmazonS3 s3ClientSpy = spyOnFilesystem(); + final Path testpath1 = + writeEventuallyConsistentFileVersion("eventually1.dat", + s3ClientSpy, TEST_MAX_RETRIES, 0 , 0); + + try (FSDataInputStream instream1 = fs.open(testpath1)) { + // succeeds on the last retry + instream1.read(); + } + } + + /** + * Ensures S3Guard and retries allow an eventually consistent read. + */ + @Test + public void testReadAfterEventuallyConsistentWrite2() throws Throwable { + requireS3Guard(); + AmazonS3 s3ClientSpy = spyOnFilesystem(); + final Path testpath2 = + writeEventuallyConsistentFileVersion("eventually2.dat", + s3ClientSpy, TEST_MAX_RETRIES + 1, 0, 0); + + try (FSDataInputStream instream2 = fs.open(testpath2)) { + if (expectedExceptionInteractions.contains( + InteractionType.EVENTUALLY_CONSISTENT_READ)) { + // keeps retrying and eventually gives up with RemoteFileChangedException + expectReadFailure(instream2); + } else { + instream2.read(); + } + } + } + + /** + * Ensures read on re-open (after seek backwards) when S3 does not return the + * version of the file tracked in the metadata store fails immediately. No + * retries should happen since a retry is not expected to recover. + */ + @Test + public void testEventuallyConsistentReadOnReopen() throws Throwable { + requireS3Guard(); + AmazonS3 s3ClientSpy = spyOnFilesystem(); + String filename = "eventually-reopen.dat"; + final Path testpath = + writeEventuallyConsistentFileVersion(filename, + s3ClientSpy, 0, 0, 0); + + try (FSDataInputStream instream = fs.open(testpath)) { + instream.read(); + // overwrite the file, returning inconsistent version for + // (effectively) infinite retries + writeEventuallyConsistentFileVersion(filename, s3ClientSpy, + Integer.MAX_VALUE, 0, 0); + instream.seek(0); + if (expectedExceptionInteractions.contains(InteractionType.READ)) { + // if it retries at all, it will retry forever, which should fail + // the test. The expected behavior is immediate + // RemoteFileChangedException. + expectReadFailure(instream); + } else { + instream.read(); + } + } + } + + /** + * Writes a file with old ETag and versionId in the metadata store such + * that the metadata is out of sync with S3. Attempts to read such a file + * should result in {@link RemoteFileChangedException}. + */ + private Path writeOutOfSyncFileVersion(String filename) throws IOException { + final Path testpath = path(filename); + final byte[] dataset = TEST_DATA_BYTES; + writeDataset(fs, testpath, dataset, dataset.length, + 1024, false); + S3AFileStatus originalStatus = (S3AFileStatus) fs.getFileStatus(testpath); + + // overwrite with half the content + writeDataset(fs, testpath, dataset, dataset.length / 2, + 1024, true); + + S3AFileStatus newStatus = (S3AFileStatus) fs.getFileStatus(testpath); + + // put back the original etag, versionId + S3AFileStatus forgedStatus = + S3AFileStatus.fromFileStatus(newStatus, Tristate.FALSE, + originalStatus.getETag(), originalStatus.getVersionId()); + fs.getMetadataStore().put( + new PathMetadata(forgedStatus, Tristate.FALSE, false)); + + return testpath; + } + + /** + * Writes {@link #TEST_DATA} to a file where the file will be inconsistent + * in S3 for a set of operations. + * The duration of the inconsistency is controlled by the + * getObjectInconsistencyCount, getMetadataInconsistencyCount, and + * copyInconsistentCallCount parameters. + * The inconsistency manifests in AmazonS3#getObject, + * AmazonS3#getObjectMetadata, and AmazonS3#copyObject. + * This method sets up the provided s3ClientSpy to return a response to each + * of these methods indicating an inconsistency where the requested object + * version (eTag or versionId) is not available until a certain retry + * threshold is met. + * Providing inconsistent call count values above or + * below the overall retry limit allows a test to simulate a condition that + * either should or should not result in an overall failure from retry + * exhaustion. + * @param filename name of file (will be under test path) + * @param s3ClientSpy s3 client to patch + * @param getObjectInconsistencyCount number of GET inconsistencies + * @param getMetadataInconsistencyCount number of HEAD inconsistencies + * @param copyInconsistencyCount number of COPY inconsistencies. + * @return the path written + * @throws IOException failure to write the test data. + */ + private Path writeEventuallyConsistentFileVersion(String filename, + AmazonS3 s3ClientSpy, + int getObjectInconsistencyCount, + int getMetadataInconsistencyCount, + int copyInconsistencyCount) + throws IOException { + return writeEventuallyConsistentData(s3ClientSpy, + path(filename), + TEST_DATA_BYTES, + getObjectInconsistencyCount, + getMetadataInconsistencyCount, + copyInconsistencyCount); + } + + /** + * Writes data to a path and configures the S3 client for inconsistent + * HEAD, GET or COPY operations. + * @param testpath absolute path of file + * @param s3ClientSpy s3 client to patch + * @param dataset bytes to write. + * @param getObjectInconsistencyCount number of GET inconsistencies + * @param getMetadataInconsistencyCount number of HEAD inconsistencies + * @param copyInconsistencyCount number of COPY inconsistencies. + * @return the path written + * @throws IOException failure to write the test data. + */ + private Path writeEventuallyConsistentData(final AmazonS3 s3ClientSpy, + final Path testpath, + final byte[] dataset, + final int getObjectInconsistencyCount, + final int getMetadataInconsistencyCount, + final int copyInconsistencyCount) + throws IOException { + writeDataset(fs, testpath, dataset, dataset.length, + 1024, true); + S3AFileStatus originalStatus = (S3AFileStatus) fs.getFileStatus(testpath); + + // overwrite with half the content + writeDataset(fs, testpath, dataset, dataset.length / 2, + 1024, true); + + LOG.debug("Original file info: {}: version={}, etag={}", testpath, + originalStatus.getVersionId(), originalStatus.getETag()); + + S3AFileStatus newStatus = (S3AFileStatus) fs.getFileStatus(testpath); + LOG.debug("Updated file info: {}: version={}, etag={}", testpath, + newStatus.getVersionId(), newStatus.getETag()); + + stubTemporaryUnavailable(s3ClientSpy, getObjectInconsistencyCount, + testpath, newStatus); + + stubTemporaryWrongVersion(s3ClientSpy, getObjectInconsistencyCount, + testpath, originalStatus); + + if (versionCheckingIsOnServer()) { + // only stub inconsistency when mode is server since no constraints that + // should trigger inconsistency are passed in any other mode + stubTemporaryCopyInconsistency(s3ClientSpy, testpath, newStatus, + copyInconsistencyCount); + } + + stubTemporaryMetadataInconsistency(s3ClientSpy, testpath, originalStatus, + newStatus, getMetadataInconsistencyCount); + + return testpath; + } + + /** + * Log the call hierarchy at debug level, helps track down + * where calls to operations are coming from. + */ + private void logLocationAtDebug() { + if (LOG.isDebugEnabled()) { + LOG.debug("Call hierarchy", new Exception("here")); + } + } + + /** + * Stubs {@link AmazonS3#getObject(GetObjectRequest)} + * within s3ClientSpy to return null until inconsistentCallCount calls have + * been made. The null response simulates what occurs when an object + * matching the specified ETag or versionId is not available. + * @param s3ClientSpy the spy to stub + * @param inconsistentCallCount the number of calls that should return the + * null response + * @param testpath the path of the object the stub should apply to + */ + private void stubTemporaryUnavailable(AmazonS3 s3ClientSpy, + int inconsistentCallCount, Path testpath, + S3AFileStatus newStatus) { + Answer temporarilyUnavailableAnswer = new Answer() { + private int callCount = 0; + + @Override + public S3Object answer(InvocationOnMock invocation) throws Throwable { + // simulates ETag or versionId constraint not met until + // inconsistentCallCount surpassed + callCount++; + if (callCount <= inconsistentCallCount) { + LOG.info("Temporarily unavailable {} count {} of {}", + testpath, callCount, inconsistentCallCount); + logLocationAtDebug(); + return null; + } + return (S3Object) invocation.callRealMethod(); + } + }; + + // match the requests that would be made in either server-side change + // detection mode + doAnswer(temporarilyUnavailableAnswer).when(s3ClientSpy) + .getObject( + matchingGetObjectRequest( + testpath, newStatus.getETag(), null)); + doAnswer(temporarilyUnavailableAnswer).when(s3ClientSpy) + .getObject( + matchingGetObjectRequest( + testpath, null, newStatus.getVersionId())); + } + + /** + * Stubs {@link AmazonS3#getObject(GetObjectRequest)} + * within s3ClientSpy to return an object modified to contain metadata + * from originalStatus until inconsistentCallCount calls have been made. + * @param s3ClientSpy the spy to stub + * @param testpath the path of the object the stub should apply to + * @param inconsistentCallCount the number of calls that should return the + * null response + * @param originalStatus the status metadata to inject into the + * inconsistentCallCount responses + */ + private void stubTemporaryWrongVersion(AmazonS3 s3ClientSpy, + int inconsistentCallCount, Path testpath, + S3AFileStatus originalStatus) { + Answer temporarilyWrongVersionAnswer = new Answer() { + private int callCount = 0; + + @Override + public S3Object answer(InvocationOnMock invocation) throws Throwable { + // simulates old ETag or versionId until inconsistentCallCount surpassed + callCount++; + S3Object s3Object = (S3Object) invocation.callRealMethod(); + if (callCount <= inconsistentCallCount) { + LOG.info("Temporary Wrong Version {} count {} of {}", + testpath, callCount, inconsistentCallCount); + logLocationAtDebug(); + S3Object objectSpy = Mockito.spy(s3Object); + ObjectMetadata metadataSpy = + Mockito.spy(s3Object.getObjectMetadata()); + when(objectSpy.getObjectMetadata()).thenReturn(metadataSpy); + when(metadataSpy.getETag()).thenReturn(originalStatus.getETag()); + when(metadataSpy.getVersionId()) + .thenReturn(originalStatus.getVersionId()); + return objectSpy; + } + return s3Object; + } + }; + + // match requests that would be made in client-side change detection + doAnswer(temporarilyWrongVersionAnswer).when(s3ClientSpy).getObject( + matchingGetObjectRequest(testpath, null, null)); + } + + /** + * Stubs {@link AmazonS3#copyObject(CopyObjectRequest)} + * within s3ClientSpy to return null (indicating preconditions not met) until + * copyInconsistentCallCount calls have been made. + * @param s3ClientSpy the spy to stub + * @param testpath the path of the object the stub should apply to + * @param newStatus the status metadata containing the ETag and versionId + * that should be matched in order for the stub to apply + * @param copyInconsistentCallCount how many times to return the + * precondition failed error + */ + private void stubTemporaryCopyInconsistency(AmazonS3 s3ClientSpy, + Path testpath, S3AFileStatus newStatus, + int copyInconsistentCallCount) { + Answer temporarilyPreconditionsNotMetAnswer = + new Answer() { + private int callCount = 0; + + @Override + public CopyObjectResult answer(InvocationOnMock invocation) + throws Throwable { + callCount++; + if (callCount <= copyInconsistentCallCount) { + String message = "preconditions not met on call " + callCount + + " of " + copyInconsistentCallCount; + LOG.info("Copying {}: {}", testpath, message); + logLocationAtDebug(); + return null; + } + return (CopyObjectResult) invocation.callRealMethod(); + } + }; + + // match requests made during copy + doAnswer(temporarilyPreconditionsNotMetAnswer).when(s3ClientSpy).copyObject( + matchingCopyObjectRequest(testpath, newStatus.getETag(), null)); + doAnswer(temporarilyPreconditionsNotMetAnswer).when(s3ClientSpy).copyObject( + matchingCopyObjectRequest(testpath, null, newStatus.getVersionId())); + } + + /** + * Stubs {@link AmazonS3#getObjectMetadata(GetObjectMetadataRequest)} + * within s3ClientSpy to return metadata from originalStatus until + * metadataInconsistentCallCount calls have been made. + * @param s3ClientSpy the spy to stub + * @param testpath the path of the object the stub should apply to + * @param originalStatus the inconsistent status metadata to return + * @param newStatus the status metadata to return after + * metadataInconsistentCallCount is met + * @param metadataInconsistentCallCount how many times to return the + * inconsistent metadata + */ + private void stubTemporaryMetadataInconsistency(AmazonS3 s3ClientSpy, + Path testpath, S3AFileStatus originalStatus, + S3AFileStatus newStatus, int metadataInconsistentCallCount) { + Answer temporarilyOldMetadataAnswer = + new Answer() { + private int callCount = 0; + + @Override + public ObjectMetadata answer(InvocationOnMock invocation) + throws Throwable { + ObjectMetadata objectMetadata = + (ObjectMetadata) invocation.callRealMethod(); + callCount++; + if (callCount <= metadataInconsistentCallCount) { + LOG.info("Inconsistent metadata {} count {} of {}", + testpath, callCount, metadataInconsistentCallCount); + logLocationAtDebug(); + ObjectMetadata metadataSpy = + Mockito.spy(objectMetadata); + when(metadataSpy.getETag()).thenReturn(originalStatus.getETag()); + when(metadataSpy.getVersionId()) + .thenReturn(originalStatus.getVersionId()); + return metadataSpy; + } + return objectMetadata; + } + }; + + // match requests made during select + doAnswer(temporarilyOldMetadataAnswer).when(s3ClientSpy).getObjectMetadata( + matchingMetadataRequest(testpath, null)); + doAnswer(temporarilyOldMetadataAnswer).when(s3ClientSpy).getObjectMetadata( + matchingMetadataRequest(testpath, newStatus.getVersionId())); + } + + /** + * Writes a file with null ETag and versionId in the metadata store. + */ + private Path writeFileWithNoVersionMetadata(String filename) + throws IOException { + final Path testpath = path(filename); + writeDataset(fs, testpath, TEST_DATA_BYTES, TEST_DATA_BYTES.length, + 1024, false); + S3AFileStatus originalStatus = (S3AFileStatus) fs.getFileStatus(testpath); + + // remove ETag and versionId + S3AFileStatus newStatus = S3AFileStatus.fromFileStatus(originalStatus, + Tristate.FALSE, null, null); + fs.getMetadataStore().put(new PathMetadata(newStatus, Tristate.FALSE, + false)); + + return testpath; + } + + /** + * The test is invalid if the policy uses versionId but the bucket doesn't + * have versioning enabled. + * + * Tests the given file for a versionId to detect whether bucket versioning + * is enabled. + */ + private void skipIfVersionPolicyAndNoVersionId(Path testpath) + throws IOException { + if (fs.getChangeDetectionPolicy().getSource() == Source.VersionId) { + // skip versionId tests if the bucket doesn't have object versioning + // enabled + Assume.assumeTrue( + "Target filesystem does not support versioning", + fs.getObjectMetadata(fs.pathToKey(testpath)).getVersionId() != null); + } + } + + /** + * Like {@link #skipIfVersionPolicyAndNoVersionId(Path)} but generates a new + * file to test versionId against. + */ + private void skipIfVersionPolicyAndNoVersionId() throws IOException { + if (fs.getChangeDetectionPolicy().getSource() == Source.VersionId) { + Path versionIdFeatureTestFile = path("versionIdTest"); + writeDataset(fs, versionIdFeatureTestFile, TEST_DATA_BYTES, + TEST_DATA_BYTES.length, 1024, true, true); + skipIfVersionPolicyAndNoVersionId(versionIdFeatureTestFile); + } + } + + private GetObjectRequest matchingGetObjectRequest(Path path, String eTag, + String versionId) { + return ArgumentMatchers.argThat(request -> { + if (request.getBucketName().equals(fs.getBucket()) + && request.getKey().equals(fs.pathToKey(path))) { + if (eTag == null && !request.getMatchingETagConstraints().isEmpty()) { + return false; + } + if (eTag != null && + !request.getMatchingETagConstraints().contains(eTag)) { + return false; + } + if (versionId == null && request.getVersionId() != null) { + return false; + } + if (versionId != null && !versionId.equals(request.getVersionId())) { + return false; + } + return true; + } + return false; + }); + } + + private CopyObjectRequest matchingCopyObjectRequest(Path path, String eTag, + String versionId) { + return ArgumentMatchers.argThat(request -> { + if (request.getSourceBucketName().equals(fs.getBucket()) + && request.getSourceKey().equals(fs.pathToKey(path))) { + if (eTag == null && !request.getMatchingETagConstraints().isEmpty()) { + return false; + } + if (eTag != null && + !request.getMatchingETagConstraints().contains(eTag)) { + return false; + } + if (versionId == null && request.getSourceVersionId() != null) { + return false; + } + if (versionId != null && + !versionId.equals(request.getSourceVersionId())) { + return false; + } + return true; + } + return false; + }); + } + + private GetObjectMetadataRequest matchingMetadataRequest(Path path, + String versionId) { + return ArgumentMatchers.argThat(request -> { + if (request.getBucketName().equals(fs.getBucket()) + && request.getKey().equals(fs.pathToKey(path))) { + if (versionId == null && request.getVersionId() != null) { + return false; + } + if (versionId != null && + !versionId.equals(request.getVersionId())) { + return false; + } + return true; + } + return false; + }); + } + + /** + * Skip a test case if it needs S3Guard and the filesystem does + * not have it. + */ + private void requireS3Guard() { + Assume.assumeTrue("S3Guard must be enabled", fs.hasMetadataStore()); + } + + /** + * Skip a test case if S3 Select is not supported on this store. + */ + private void requireS3Select() { + Assume.assumeTrue("S3 Select is not enabled", + getFileSystem().hasCapability(S3_SELECT_CAPABILITY)); + } + + /** + * Spy on the filesystem at the S3 client level. + * @return a mocked S3 client to which the test FS is bonded. + */ + private AmazonS3 spyOnFilesystem() { + AmazonS3 s3ClientSpy = Mockito.spy( + fs.getAmazonS3ClientForTesting("mocking")); + fs.setAmazonS3Client(s3ClientSpy); + return s3ClientSpy; + } + + /** + * Expect reading this stream to fail. + * @param instream input stream. + * @return the caught exception. + * @throws Exception an other exception + */ + + private RemoteFileChangedException expectReadFailure( + final FSDataInputStream instream) + throws Exception { + return intercept(RemoteFileChangedException.class, "", + "read() returned", + () -> readToText(instream.read())); + } + + /** + * Convert the result of a read to a text string for errors. + * @param r result of the read() call. + * @return a string for exception text. + */ + private String readToText(int r) { + return r < 32 + ? (String.format("%02d", r)) + : (String.format("%c", (char) r)); + } + + /** + * Is the version checking on the server? + * @return true if the server returns 412 errors. + */ + private boolean versionCheckingIsOnServer() { + return fs.getChangeDetectionPolicy().getMode() == Mode.Server; + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java index d3b3c21c92..c90dd7c633 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java @@ -30,6 +30,7 @@ import org.apache.hadoop.fs.contract.AbstractFSContract; import org.apache.hadoop.fs.contract.s3a.S3AContract; +import com.google.common.collect.Lists; import org.junit.Assume; import org.junit.Test; @@ -253,7 +254,7 @@ public void testConsistentRenameAfterDelete() throws Exception { DEFAULT_DELAY_KEY_SUBSTRING))); try { - RemoteIterator old = fs.listFilesAndEmptyDirectories( + RemoteIterator old = fs.listFilesAndEmptyDirectories( path("a"), true); fail("Recently renamed dir should not be visible"); } catch(FileNotFoundException e) { @@ -557,6 +558,44 @@ public void testInconsistentS3ClientDeletes() throws Throwable { ); } + /** + * Tests that the file's eTag and versionId are preserved in recursive + * listings. + */ + @Test + public void testListingReturnsVersionMetadata() throws Throwable { + S3AFileSystem fs = getFileSystem(); + Assume.assumeTrue(fs.hasMetadataStore()); + + // write simple file + Path file = path("file1"); + try (FSDataOutputStream outputStream = fs.create(file)) { + outputStream.writeChars("hello"); + } + + // get individual file status + FileStatus[] fileStatuses = fs.listStatus(file); + assertEquals(1, fileStatuses.length); + S3AFileStatus status = (S3AFileStatus) fileStatuses[0]; + String eTag = status.getETag(); + String versionId = status.getVersionId(); + + // get status through recursive directory listing + RemoteIterator filesIterator = fs.listFiles( + file.getParent(), true); + List files = Lists.newArrayList(); + while (filesIterator.hasNext()) { + files.add(filesIterator.next()); + } + assertEquals(1, files.size()); + + // ensure eTag and versionId are preserved in directory listing + S3ALocatedFileStatus locatedFileStatus = + (S3ALocatedFileStatus) files.get(0); + assertEquals(eTag, locatedFileStatus.getETag()); + assertEquals(versionId, locatedFileStatus.getVersionId()); + } + /** * Assert that the two list sizes match; failure message includes the lists. * @param message text for the assertion diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardOutOfBandOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardOutOfBandOperations.java index adaf538c6c..6dbe6f91d4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardOutOfBandOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardOutOfBandOperations.java @@ -26,6 +26,7 @@ import java.util.UUID; import java.util.stream.Collectors; +import org.junit.Assume; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -37,6 +38,8 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata; import org.apache.hadoop.fs.s3a.s3guard.MetadataStore; @@ -243,6 +246,12 @@ public void testLongerLengthOverwrite() throws Exception { @Test public void testOutOfBandDeletes() throws Exception { + ChangeDetectionPolicy changeDetectionPolicy = + ((S3AFileSystem) getFileSystem()).getChangeDetectionPolicy(); + Assume.assumeFalse("FNF not expected when using a bucket with" + + " object versioning", + changeDetectionPolicy.getSource() == Source.VersionId); + Path testFileName = path("OutOfBandDelete-" + UUID.randomUUID()); outOfBandDeletes(testFileName, authoritative); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java index 51ff299e7b..0e091a9e9c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java @@ -177,7 +177,7 @@ public boolean exists(Path f) throws IOException { } @Override - void finishedWrite(String key, long length) { + void finishedWrite(String key, long length, String eTag, String versionId) { } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index 260ecdd71d..977c30d0f0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -1042,30 +1042,25 @@ public static void verifyFileStatus(FileStatus status, * Verify the status entry of a directory matches that expected. * @param status status entry to check * @param replication replication factor - * @param modTime modified time - * @param accessTime access time * @param owner owner - * @param group user group - * @param permission permission. */ - public static void verifyDirStatus(FileStatus status, + public static void verifyDirStatus(S3AFileStatus status, int replication, - long modTime, - long accessTime, - String owner, - String group, - FsPermission permission) { + String owner) { String details = status.toString(); assertTrue("Is a dir: " + details, status.isDirectory()); assertEquals("zero length: " + details, 0, status.getLen()); - - assertEquals("Mod time: " + details, modTime, status.getModificationTime()); + // S3AFileStatus always assigns modTime = System.currentTimeMillis() + assertTrue("Mod time: " + details, status.getModificationTime() > 0); assertEquals("Replication value: " + details, replication, status.getReplication()); - assertEquals("Access time: " + details, accessTime, status.getAccessTime()); + assertEquals("Access time: " + details, 0, status.getAccessTime()); assertEquals("Owner: " + details, owner, status.getOwner()); - assertEquals("Group: " + details, group, status.getGroup()); - assertEquals("Permission: " + details, permission, status.getPermission()); + // S3AFileStatus always assigns group=owner + assertEquals("Group: " + details, owner, status.getGroup()); + // S3AFileStatus always assigns permission = default + assertEquals("Permission: " + details, + FsPermission.getDefault(), status.getPermission()); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java index 39a5e3bd87..1a533bfe64 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestListing.java @@ -19,7 +19,6 @@ package org.apache.hadoop.fs.s3a; import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.junit.Assert; @@ -40,11 +39,11 @@ */ public class TestListing extends AbstractS3AMockTest { - private static class MockRemoteIterator implements - RemoteIterator { - private Iterator iterator; + private static class MockRemoteIterator implements + RemoteIterator { + private Iterator iterator; - MockRemoteIterator(Collection source) { + MockRemoteIterator(Collection source) { iterator = source.iterator(); } @@ -52,13 +51,13 @@ public boolean hasNext() { return iterator.hasNext(); } - public FileStatus next() { + public S3AFileStatus next() { return iterator.next(); } } - private FileStatus blankFileStatus(Path path) { - return new FileStatus(0, true, 0, 0, 0, path); + private S3AFileStatus blankFileStatus(Path path) { + return new S3AFileStatus(Tristate.UNKNOWN, path, null); } @Test @@ -78,11 +77,11 @@ public void testTombstoneReconcilingIterator() throws Exception { Set tombstones = new HashSet<>(); tombstones.add(deletedChild); - RemoteIterator sourceIterator = new MockRemoteIterator( + RemoteIterator sourceIterator = new MockRemoteIterator( statuses); - RemoteIterator locatedIterator = + RemoteIterator locatedIterator = listing.createLocatedFileStatusIterator(sourceIterator); - RemoteIterator reconcilingIterator = + RemoteIterator reconcilingIterator = listing.createTombstoneReconcilingIterator(locatedIterator, tombstones); Set expectedPaths = new HashSet<>(); @@ -98,8 +97,12 @@ public void testTombstoneReconcilingIterator() throws Exception { @Test public void testProvidedFileStatusIteratorEnd() throws Exception { - FileStatus[] statuses = { - new FileStatus(100, false, 1, 8192, 0, new Path("s3a://blah/blah")) + S3AFileStatus s3aStatus = new S3AFileStatus( + 100, 0, new Path("s3a://blah/blah"), + 8192, null, null, null); + + S3AFileStatus[] statuses = { + s3aStatus }; ProvidedFileStatusIterator it = new ProvidedFileStatusIterator(statuses, ACCEPT_ALL, new Listing.AcceptAllButS3nDirs()); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java index f073c4c486..c645ac5ad8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestStreamChangeTracker.java @@ -20,18 +20,23 @@ import java.util.concurrent.atomic.AtomicLong; +import com.amazonaws.AmazonServiceException; +import com.amazonaws.SdkBaseException; import com.amazonaws.services.s3.Headers; +import com.amazonaws.services.s3.model.CopyObjectRequest; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.S3Object; -import org.apache.hadoop.fs.PathIOException; +import com.amazonaws.services.s3.transfer.model.CopyResult; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; import org.apache.hadoop.fs.s3a.impl.ChangeTracker; import org.apache.hadoop.test.HadoopTestBase; +import org.apache.http.HttpStatus; import static org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.CHANGE_DETECTED; import static org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.createPolicy; @@ -50,6 +55,8 @@ public class TestStreamChangeTracker extends HadoopTestBase { public static final String OBJECT = "object"; + public static final String DEST_OBJECT = "new_object"; + public static final String URI = "s3a://" + BUCKET + "/" + OBJECT; @Test @@ -161,12 +168,108 @@ public void testVersionCheckingOnServer() throws Throwable { CHANGE_DETECTED); } + @Test + public void testVersionCheckingUpfrontETag() throws Throwable { + ChangeTracker tracker = newTracker( + ChangeDetectionPolicy.Mode.Server, + ChangeDetectionPolicy.Source.ETag, + false, + objectAttributes("etag1", "versionid1")); + + assertEquals("etag1", tracker.getRevisionId()); + } + + @Test + public void testVersionCheckingUpfrontVersionId() throws Throwable { + ChangeTracker tracker = newTracker( + ChangeDetectionPolicy.Mode.Server, + ChangeDetectionPolicy.Source.VersionId, + false, + objectAttributes("etag1", "versionid1")); + + assertEquals("versionid1", tracker.getRevisionId()); + } + + @Test + public void testVersionCheckingETagCopyServer() throws Throwable { + ChangeTracker tracker = newTracker( + ChangeDetectionPolicy.Mode.Server, + ChangeDetectionPolicy.Source.VersionId, + false, + objectAttributes("etag1", "versionid1")); + assertConstraintApplied(tracker, newCopyObjectRequest()); + } + + @Test + public void testVersionCheckingETagCopyClient() throws Throwable { + ChangeTracker tracker = newTracker( + ChangeDetectionPolicy.Mode.Client, + ChangeDetectionPolicy.Source.VersionId, + false, + objectAttributes("etag1", "versionid1")); + assertFalse("Tracker should not have applied contraints " + tracker, + tracker.maybeApplyConstraint(newCopyObjectRequest())); + } + + @Test + public void testCopyVersionIdRequired() throws Throwable { + ChangeTracker tracker = newTracker( + ChangeDetectionPolicy.Mode.Client, + ChangeDetectionPolicy.Source.VersionId, + true, + objectAttributes("etag1", "versionId")); + + expectNoVersionAttributeException(tracker, newCopyResult("etag1", + null), + "policy requires VersionId"); + } + + @Test + public void testCopyETagRequired() throws Throwable { + ChangeTracker tracker = newTracker( + ChangeDetectionPolicy.Mode.Client, + ChangeDetectionPolicy.Source.ETag, + true, + objectAttributes("etag1", "versionId")); + + expectNoVersionAttributeException(tracker, newCopyResult(null, + "versionId"), + "policy requires ETag"); + } + + @Test + public void testCopyVersionMismatch() throws Throwable { + ChangeTracker tracker = newTracker( + ChangeDetectionPolicy.Mode.Server, + ChangeDetectionPolicy.Source.ETag, + true, + objectAttributes("etag", "versionId")); + + // 412 is translated to RemoteFileChangedException + // note: this scenario is never currently hit due to + // https://github.com/aws/aws-sdk-java/issues/1644 + AmazonServiceException awsException = + new AmazonServiceException("aws exception"); + awsException.setStatusCode(HttpStatus.SC_PRECONDITION_FAILED); + expectChangeException(tracker, awsException, "copy", + RemoteFileChangedException.PRECONDITIONS_FAILED); + + // processing another type of exception does nothing + tracker.processException(new SdkBaseException("foo"), "copy"); + } + protected void assertConstraintApplied(final ChangeTracker tracker, final GetObjectRequest request) { assertTrue("Tracker should have applied contraints " + tracker, tracker.maybeApplyConstraint(request)); } + protected void assertConstraintApplied(final ChangeTracker tracker, + final CopyObjectRequest request) throws PathIOException { + assertTrue("Tracker should have applied contraints " + tracker, + tracker.maybeApplyConstraint(request)); + } + protected RemoteFileChangedException expectChangeException( final ChangeTracker tracker, final S3Object response, @@ -175,6 +278,15 @@ protected RemoteFileChangedException expectChangeException( RemoteFileChangedException.class); } + protected RemoteFileChangedException expectChangeException( + final ChangeTracker tracker, + final SdkBaseException exception, + final String operation, + final String message) throws Exception { + return expectException(tracker, exception, operation, message, + RemoteFileChangedException.class); + } + protected PathIOException expectNoVersionAttributeException( final ChangeTracker tracker, final S3Object response, @@ -183,6 +295,14 @@ protected PathIOException expectNoVersionAttributeException( NoVersionAttributeException.class); } + protected PathIOException expectNoVersionAttributeException( + final ChangeTracker tracker, + final CopyResult response, + final String message) throws Exception { + return expectException(tracker, response, message, + NoVersionAttributeException.class); + } + protected T expectException( final ChangeTracker tracker, final S3Object response, @@ -197,6 +317,35 @@ protected T expectException( }); } + protected T expectException( + final ChangeTracker tracker, + final CopyResult response, + final String message, + final Class clazz) throws Exception { + return intercept( + clazz, + message, + () -> { + tracker.processResponse(response); + return tracker; + }); + } + + protected T expectException( + final ChangeTracker tracker, + final SdkBaseException exception, + final String operation, + final String message, + final Class clazz) throws Exception { + return intercept( + clazz, + message, + () -> { + tracker.processException(exception, operation); + return tracker; + }); + } + protected void assertRevisionId(final ChangeTracker tracker, final String revId) { assertEquals("Wrong revision ID in " + tracker, @@ -218,14 +367,29 @@ protected void assertTrackerMismatchCount( */ protected ChangeTracker newTracker(final ChangeDetectionPolicy.Mode mode, final ChangeDetectionPolicy.Source source, boolean requireVersion) { + return newTracker(mode, source, requireVersion, + objectAttributes(null, null)); + } + + /** + * Create tracker. + * Contains standard assertions(s). + * @return the tracker. + */ + protected ChangeTracker newTracker(final ChangeDetectionPolicy.Mode mode, + final ChangeDetectionPolicy.Source source, boolean requireVersion, + S3ObjectAttributes objectAttributes) { ChangeDetectionPolicy policy = createPolicy( mode, source, requireVersion); ChangeTracker tracker = new ChangeTracker(URI, policy, - new AtomicLong(0)); - assertFalse("Tracker should not have applied constraints " + tracker, - tracker.maybeApplyConstraint(newGetObjectRequest())); + new AtomicLong(0), objectAttributes); + if (objectAttributes.getVersionId() == null + && objectAttributes.getETag() == null) { + assertFalse("Tracker should not have applied constraints " + tracker, + tracker.maybeApplyConstraint(newGetObjectRequest())); + } return tracker; } @@ -233,6 +397,21 @@ private GetObjectRequest newGetObjectRequest() { return new GetObjectRequest(BUCKET, OBJECT); } + private CopyObjectRequest newCopyObjectRequest() { + return new CopyObjectRequest(BUCKET, OBJECT, BUCKET, DEST_OBJECT); + } + + private CopyResult newCopyResult(String eTag, String versionId) { + CopyResult copyResult = new CopyResult(); + copyResult.setSourceBucketName(BUCKET); + copyResult.setSourceKey(OBJECT); + copyResult.setDestinationBucketName(BUCKET); + copyResult.setDestinationKey(DEST_OBJECT); + copyResult.setETag(eTag); + copyResult.setVersionId(versionId); + return copyResult; + } + private S3Object newResponse(String etag, String versionId) { ObjectMetadata md = new ObjectMetadata(); if (etag != null) { @@ -252,4 +431,14 @@ private S3Object emptyResponse() { response.setKey(OBJECT); return response; } + + private S3ObjectAttributes objectAttributes( + String etag, String versionId) { + return new S3ObjectAttributes(BUCKET, + OBJECT, + null, + null, + etag, + versionId); + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java index ad4691a6d9..589628c5c9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/AbstractS3GuardToolTestBase.java @@ -221,7 +221,7 @@ protected void createFile(Path path, boolean onS3, boolean onMetadataStore) ContractTestUtils.touch(fs, path); } else if (onMetadataStore) { S3AFileStatus status = new S3AFileStatus(100L, System.currentTimeMillis(), - fs.qualify(path), 512L, "hdfs"); + fs.qualify(path), 512L, "hdfs", null, null); putFile(ms, status); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java index 158f13d3fc..972cbe5f5e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStore.java @@ -246,12 +246,13 @@ public DynamoDBMSContract createContract(Configuration conf) { } @Override - FileStatus basicFileStatus(Path path, int size, boolean isDir) + S3AFileStatus basicFileStatus(Path path, int size, boolean isDir) throws IOException { String owner = UserGroupInformation.getCurrentUser().getShortUserName(); return isDir ? new S3AFileStatus(true, path, owner) - : new S3AFileStatus(size, getModTime(), path, BLOCK_SIZE, owner); + : new S3AFileStatus(size, getModTime(), path, BLOCK_SIZE, owner, + null, null); } private DynamoDBMetadataStore getDynamoMetadataStore() throws IOException { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java index 408c72eb8f..301ba16aec 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestDynamoDBMetadataStoreScale.java @@ -41,7 +41,6 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.StorageStatistics; import org.apache.hadoop.fs.contract.ContractTestUtils; @@ -231,7 +230,7 @@ public void test_030_BatchedWrite() throws Exception { long pruneItems = 0; for (long i = 0; i < iterations; i++) { Path longPath = pathOfDepth(BATCH_SIZE, String.valueOf(i)); - FileStatus status = basicFileStatus(longPath, 0, false, 12345, + S3AFileStatus status = basicFileStatus(longPath, 0, false, 12345); PathMetadata pm = new PathMetadata(status); synchronized (toCleanup) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java index 6a4d45e9ea..f81f0e2bc1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java @@ -37,7 +37,9 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.Tristate; import static org.apache.hadoop.fs.s3a.MultipartTestUtils.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getLandsatCSVFile; @@ -95,6 +97,41 @@ public void testImportCommand() throws Exception { // assertTrue(children.isAuthoritative()); } + @Test + public void testImportCommandRepairsETagAndVersionId() throws Exception { + S3AFileSystem fs = getFileSystem(); + MetadataStore ms = getMetadataStore(); + Path path = path("test-version-metadata"); + try (FSDataOutputStream out = fs.create(path)) { + out.write(1); + } + S3AFileStatus originalStatus = (S3AFileStatus) fs.getFileStatus(path); + + // put in bogus ETag and versionId + S3AFileStatus bogusStatus = S3AFileStatus.fromFileStatus(originalStatus, + Tristate.FALSE, "bogusETag", "bogusVersionId"); + ms.put(new PathMetadata(bogusStatus)); + + // sanity check that bogus status is actually persisted + S3AFileStatus retrievedBogusStatus = (S3AFileStatus) fs.getFileStatus(path); + assertEquals("bogus ETag was not persisted", + "bogusETag", retrievedBogusStatus.getETag()); + assertEquals("bogus versionId was not persisted", + "bogusVersionId", retrievedBogusStatus.getVersionId()); + + // execute the import + S3GuardTool.Import cmd = new S3GuardTool.Import(fs.getConf()); + cmd.setStore(ms); + exec(cmd, "import", path.toString()); + + // make sure ETag and versionId were corrected + S3AFileStatus updatedStatus = (S3AFileStatus) fs.getFileStatus(path); + assertEquals("ETag was not corrected", + originalStatus.getETag(), updatedStatus.getETag()); + assertEquals("VersionId was not corrected", + originalStatus.getVersionId(), updatedStatus.getVersionId()); + } + @Test public void testDestroyBucketExistsButNoTable() throws Throwable { run(Destroy.NAME, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java index 799c5a046b..47544f4eb6 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java @@ -38,6 +38,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.apache.hadoop.fs.s3a.Tristate; import org.apache.hadoop.io.IOUtils; @@ -61,11 +62,13 @@ public abstract class MetadataStoreTestBase extends HadoopTestBase { /** Some dummy values for sanity-checking FileStatus contents. */ static final long BLOCK_SIZE = 32 * 1024 * 1024; static final int REPLICATION = 1; - static final FsPermission PERMISSION = new FsPermission((short)0755); static final String OWNER = "bob"; - static final String GROUP = "uncles"; - private final long accessTime = System.currentTimeMillis(); - private final long modTime = accessTime - 5000; + private final long modTime = System.currentTimeMillis() - 5000; + + // attributes not supported by S3AFileStatus + static final FsPermission PERMISSION = null; + static final String GROUP = null; + private final long accessTime = 0; /** * Each test should override this. Will use a new Configuration instance. @@ -148,14 +151,14 @@ private void doTestDescendantsIterator( String[] checkNodes) throws Exception { // we set up the example file system tree in metadata store for (String pathStr : createNodes) { - final FileStatus status = pathStr.contains("file") + final S3AFileStatus status = pathStr.contains("file") ? basicFileStatus(strToPath(pathStr), 100, false) : basicFileStatus(strToPath(pathStr), 0, true); ms.put(new PathMetadata(status)); } final PathMetadata rootMeta = new PathMetadata(makeDirStatus("/")); - RemoteIterator iterator; + RemoteIterator iterator; if (implementation == DescendantsIterator.class) { iterator = new DescendantsIterator(ms, rootMeta); } else if (implementation == MetadataStoreListFilesIterator.class) { @@ -647,8 +650,7 @@ public void testPruneFiles() throws Exception { createNewDirs("/pruneFiles"); long oldTime = getTime(); - ms.put(new PathMetadata(makeFileStatus("/pruneFiles/old", 1, oldTime, - oldTime))); + ms.put(new PathMetadata(makeFileStatus("/pruneFiles/old", 1, oldTime))); DirListingMetadata ls2 = ms.listChildren(strToPath("/pruneFiles")); if (!allowMissing()) { assertListingsEqual(ls2.getListing(), "/pruneFiles/old"); @@ -659,8 +661,7 @@ public void testPruneFiles() throws Exception { Thread.sleep(1); long cutoff = System.currentTimeMillis(); long newTime = getTime(); - ms.put(new PathMetadata(makeFileStatus("/pruneFiles/new", 1, newTime, - newTime))); + ms.put(new PathMetadata(makeFileStatus("/pruneFiles/new", 1, newTime))); DirListingMetadata ls; ls = ms.listChildren(strToPath("/pruneFiles")); @@ -691,7 +692,7 @@ public void testPruneDirs() throws Exception { long oldTime = getTime(); ms.put(new PathMetadata(makeFileStatus("/pruneDirs/dir/file", - 1, oldTime, oldTime))); + 1, oldTime))); // It's possible for the Local implementation to get from the old // modification time to here in under 1ms, causing it to not get pruned @@ -715,10 +716,10 @@ public void testPruneUnsetsAuthoritative() throws Exception { createNewDirs(rootDir, grandparentDir, parentDir); long time = System.currentTimeMillis(); ms.put(new PathMetadata( - new FileStatus(0, false, 0, 0, time - 1, strToPath(staleFile)), + basicFileStatus(0, false, 0, time - 1, strToPath(staleFile)), Tristate.FALSE, false)); ms.put(new PathMetadata( - new FileStatus(0, false, 0, 0, time + 1, strToPath(freshFile)), + basicFileStatus(0, false, 0, time + 1, strToPath(freshFile)), Tristate.FALSE, false)); // set parent dir as authoritative @@ -752,10 +753,10 @@ public void testPrunePreservesAuthoritative() throws Exception { createNewDirs(rootDir, grandparentDir, parentDir); long time = System.currentTimeMillis(); ms.put(new PathMetadata( - new FileStatus(0, false, 0, 0, time + 1, strToPath(staleFile)), + basicFileStatus(0, false, 0, time + 1, strToPath(staleFile)), Tristate.FALSE, false)); ms.put(new PathMetadata( - new FileStatus(0, false, 0, 0, time + 1, strToPath(freshFile)), + basicFileStatus(0, false, 0, time + 1, strToPath(freshFile)), Tristate.FALSE, false)); if (!allowMissing()) { @@ -811,7 +812,7 @@ public void testPutDirListingMetadataPutsFileMetadata() @Test public void testPutRetainsIsDeletedInParentListing() throws Exception { final Path path = strToPath("/a/b"); - final FileStatus fileStatus = basicFileStatus(path, 0, false); + final S3AFileStatus fileStatus = basicFileStatus(path, 0, false); PathMetadata pm = new PathMetadata(fileStatus); pm.setIsDeleted(true); ms.put(pm); @@ -948,40 +949,54 @@ private void assertEmptyDirs(String ...dirs) throws IOException { } } - FileStatus basicFileStatus(Path path, int size, boolean isDir) throws + S3AFileStatus basicFileStatus(Path path, int size, boolean isDir) throws IOException { - return basicFileStatus(path, size, isDir, modTime, accessTime); + return basicFileStatus(path, size, isDir, modTime); } - public static FileStatus basicFileStatus(Path path, int size, boolean isDir, - long newModTime, long newAccessTime) throws IOException { - return new FileStatus(size, isDir, REPLICATION, BLOCK_SIZE, newModTime, - newAccessTime, PERMISSION, OWNER, GROUP, path); + S3AFileStatus basicFileStatus(int size, boolean isDir, + long blockSize, long modificationTime, Path path) { + if (isDir) { + return new S3AFileStatus(Tristate.UNKNOWN, path, null); + } else { + return new S3AFileStatus(size, modificationTime, path, blockSize, null, + null, null); + } } - private FileStatus makeFileStatus(String pathStr, int size) throws + public static S3AFileStatus basicFileStatus(Path path, int size, + boolean isDir, long newModTime) throws IOException { + if (isDir) { + return new S3AFileStatus(Tristate.UNKNOWN, path, OWNER); + } else { + return new S3AFileStatus(size, newModTime, path, BLOCK_SIZE, OWNER, + null, null); + } + } + + private S3AFileStatus makeFileStatus(String pathStr, int size) throws IOException { - return makeFileStatus(pathStr, size, modTime, accessTime); + return makeFileStatus(pathStr, size, modTime); } - private FileStatus makeFileStatus(String pathStr, int size, long newModTime, - long newAccessTime) throws IOException { + private S3AFileStatus makeFileStatus(String pathStr, int size, + long newModTime) throws IOException { return basicFileStatus(strToPath(pathStr), size, false, - newModTime, newAccessTime); + newModTime); } void verifyFileStatus(FileStatus status, long size) { S3ATestUtils.verifyFileStatus(status, size, BLOCK_SIZE, modTime); } - private FileStatus makeDirStatus(String pathStr) throws IOException { - return basicFileStatus(strToPath(pathStr), 0, true, modTime, accessTime); + private S3AFileStatus makeDirStatus(String pathStr) throws IOException { + return basicFileStatus(strToPath(pathStr), 0, true, modTime); } /** * Verify the directory file status. Subclass may verify additional fields. */ - void verifyDirStatus(FileStatus status) { + void verifyDirStatus(S3AFileStatus status) { assertTrue("Is a dir", status.isDirectory()); assertEquals("zero length", 0, status.getLen()); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDirListingMetadata.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDirListingMetadata.java index 8458252af7..cb183a2954 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDirListingMetadata.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDirListingMetadata.java @@ -38,6 +38,8 @@ public class TestDirListingMetadata { private static final String TEST_OWNER = "hadoop"; + public static final String TEST_ETAG = "abc"; + public static final String TEST_VERSION_ID = "def"; @Rule public ExpectedException exception = ExpectedException.none(); @@ -79,7 +81,8 @@ public void testListing() { PathMetadata pathMeta2 = new PathMetadata( new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER)); PathMetadata pathMeta3 = new PathMetadata( - new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER)); + new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER, + TEST_ETAG, TEST_VERSION_ID)); List listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3); DirListingMetadata meta = new DirListingMetadata(path, listing, false); assertEquals(path, meta.getPath()); @@ -130,7 +133,8 @@ public void testGet() { PathMetadata pathMeta2 = new PathMetadata( new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER)); PathMetadata pathMeta3 = new PathMetadata( - new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER)); + new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER, + TEST_ETAG, TEST_VERSION_ID)); List listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3); DirListingMetadata meta = new DirListingMetadata(path, listing, false); assertEquals(path, meta.getPath()); @@ -181,7 +185,8 @@ public void testPut() { PathMetadata pathMeta2 = new PathMetadata( new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER)); PathMetadata pathMeta3 = new PathMetadata( - new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER)); + new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER, + TEST_ETAG, TEST_VERSION_ID)); List listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3); DirListingMetadata meta = new DirListingMetadata(path, listing, false); assertEquals(path, meta.getPath()); @@ -243,7 +248,8 @@ public void testRemove() { PathMetadata pathMeta2 = new PathMetadata( new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER)); PathMetadata pathMeta3 = new PathMetadata( - new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER)); + new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER, + TEST_ETAG, TEST_VERSION_ID)); List listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3); DirListingMetadata meta = new DirListingMetadata(path, listing, false); assertEquals(path, meta.getPath()); @@ -296,7 +302,7 @@ private static DirListingMetadata makeTwoDirsOneFile(Path parent) { new S3AFileStatus(true, new Path(parent, "dir2"), TEST_OWNER)); PathMetadata pathMeta3 = new PathMetadata( new S3AFileStatus(123, 456, new Path(parent, "file1"), 8192, - TEST_OWNER)); + TEST_OWNER, TEST_ETAG, TEST_VERSION_ID)); List listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3); return new DirListingMetadata(parent, listing, false); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java index 2ea20b26b0..d0156f13e8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestLocalMetadataStore.java @@ -30,7 +30,9 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3ATestUtils; +import org.apache.hadoop.fs.s3a.Tristate; /** * MetadataStore unit test for {@link LocalMetadataStore}. @@ -168,8 +170,8 @@ private static void populateMap(Cache cache, private static void populateEntry(Cache cache, Path path) { - FileStatus fileStatus = new FileStatus(0, true, 0, 0, 0, path); - cache.put(path, new LocalMetadataEntry(new PathMetadata(fileStatus))); + S3AFileStatus s3aStatus = new S3AFileStatus(Tristate.UNKNOWN, path, null); + cache.put(path, new LocalMetadataEntry(new PathMetadata(s3aStatus))); } private static long sizeOfMap(Cache cache) { @@ -196,9 +198,8 @@ protected void verifyFileStatus(FileStatus status, long size) { } @Override - protected void verifyDirStatus(FileStatus status) { - S3ATestUtils.verifyDirStatus(status, REPLICATION, getModTime(), - getAccessTime(), OWNER, GROUP, PERMISSION); + protected void verifyDirStatus(S3AFileStatus status) { + S3ATestUtils.verifyDirStatus(status, REPLICATION, OWNER); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestObjectChangeDetectionAttributes.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestObjectChangeDetectionAttributes.java new file mode 100644 index 0000000000..f001262b36 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestObjectChangeDetectionAttributes.java @@ -0,0 +1,380 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.s3guard; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; + +import com.amazonaws.services.s3.Headers; +import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; +import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; +import com.amazonaws.services.s3.model.GetObjectMetadataRequest; +import com.amazonaws.services.s3.model.GetObjectRequest; +import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; +import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; +import com.amazonaws.services.s3.model.ListObjectsV2Request; +import com.amazonaws.services.s3.model.ListObjectsV2Result; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.PutObjectRequest; +import com.amazonaws.services.s3.model.PutObjectResult; +import com.amazonaws.services.s3.model.S3Object; +import com.amazonaws.services.s3.model.UploadPartRequest; +import com.amazonaws.services.s3.model.UploadPartResult; +import org.hamcrest.BaseMatcher; +import org.hamcrest.Description; +import org.hamcrest.Matcher; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.AbstractS3AMockTest; +import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.S3AFileStatus; + +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE_SERVER; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_SOURCE; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_SOURCE_ETAG; +import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_SOURCE_VERSION_ID; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.when; +import static org.mockito.hamcrest.MockitoHamcrest.argThat; + +/** + * Unit tests to ensure object eTag and versionId are captured on S3 PUT and + * used on GET. + * Further (integration) testing is performed in + * {@link org.apache.hadoop.fs.s3a.ITestS3ARemoteFileChanged}. + */ +@RunWith(Parameterized.class) +public class TestObjectChangeDetectionAttributes extends AbstractS3AMockTest { + private final String changeDetectionSource; + + public TestObjectChangeDetectionAttributes(String changeDetectionSource) { + this.changeDetectionSource = changeDetectionSource; + } + + @Parameterized.Parameters(name = "change={0}") + public static Collection params() { + return Arrays.asList(new Object[][]{ + {CHANGE_DETECT_SOURCE_ETAG}, + {CHANGE_DETECT_SOURCE_VERSION_ID} + }); + } + + @Override + public Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + conf.setClass(Constants.S3_METADATA_STORE_IMPL, + LocalMetadataStore.class, MetadataStore.class); + conf.set(CHANGE_DETECT_SOURCE, changeDetectionSource); + conf.set(CHANGE_DETECT_MODE, CHANGE_DETECT_MODE_SERVER); + return conf; + } + + /** + * Tests a file uploaded with a single PUT to ensure eTag is captured and used + * on file read. + */ + @Test + public void testCreateAndReadFileSinglePart() throws Exception { + String bucket = "s3a://mock-bucket/"; + String file = "single-part-file"; + Path path = new Path(bucket, file); + byte[] content = "content".getBytes(); + String eTag = "abc"; + String versionId = "def"; + + putObject(file, path, content, eTag, versionId); + + // make sure the eTag and versionId were put into the metadataStore + assertVersionAttributes(path, eTag, versionId); + + // Ensure underlying S3 getObject call uses the stored eTag or versionId + // when reading data back. If it doesn't, the read won't work and the + // assert will fail. + assertContent(file, path, content, eTag, versionId); + + // test overwrite + byte[] newConent = "newcontent".getBytes(); + String newETag = "newETag"; + String newVersionId = "newVersionId"; + + putObject(file, path, newConent, newETag, newVersionId); + assertVersionAttributes(path, newETag, newVersionId); + assertContent(file, path, newConent, newETag, newVersionId); + } + + /** + * Tests a file uploaded with multi-part upload to ensure eTag is captured + * and used on file read. + */ + @Test + public void testCreateAndReadFileMultiPart() throws Exception { + String bucket = "s3a://mock-bucket/"; + String file = "multi-part-file"; + Path path = new Path(bucket, file); + byte[] content = new byte[Constants.MULTIPART_MIN_SIZE + 1]; + String eTag = "abc"; + String versionId = "def"; + + multipartUpload(file, path, content, eTag, versionId); + + // make sure the eTag and versionId were put into the metadataStore + assertVersionAttributes(path, eTag, versionId); + + // Ensure underlying S3 getObject call uses the stored eTag or versionId + // when reading data back. If it doesn't, the read won't work and the + // assert will fail. + assertContent(file, path, content, eTag, versionId); + + // test overwrite + byte[] newContent = new byte[Constants.MULTIPART_MIN_SIZE + 1]; + Arrays.fill(newContent, (byte) 1); + String newETag = "newETag"; + String newVersionId = "newVersionId"; + + multipartUpload(file, path, newContent, newETag, newVersionId); + assertVersionAttributes(path, newETag, newVersionId); + assertContent(file, path, newContent, newETag, newVersionId); + } + + private void putObject(String file, Path path, byte[] content, + String eTag, String versionId) throws IOException { + PutObjectResult putObjectResult = new PutObjectResult(); + ObjectMetadata objectMetadata = new ObjectMetadata(); + objectMetadata.setContentLength(content.length); + putObjectResult.setMetadata(objectMetadata); + putObjectResult.setETag(eTag); + putObjectResult.setVersionId(versionId); + + when(s3.getObjectMetadata(any(GetObjectMetadataRequest.class))) + .thenThrow(NOT_FOUND); + when(s3.putObject(argThat(correctPutObjectRequest(file)))) + .thenReturn(putObjectResult); + ListObjectsV2Result emptyListing = new ListObjectsV2Result(); + when(s3.listObjectsV2(argThat(correctListObjectsRequest(file + "/")))) + .thenReturn(emptyListing); + + FSDataOutputStream outputStream = fs.create(path); + outputStream.write(content); + outputStream.close(); + } + + private void multipartUpload(String file, Path path, byte[] content, + String eTag, String versionId) throws IOException { + CompleteMultipartUploadResult uploadResult = + new CompleteMultipartUploadResult(); + uploadResult.setVersionId(versionId); + + when(s3.getObjectMetadata(any(GetObjectMetadataRequest.class))) + .thenThrow(NOT_FOUND); + + InitiateMultipartUploadResult initiateMultipartUploadResult = + new InitiateMultipartUploadResult(); + initiateMultipartUploadResult.setUploadId("uploadId"); + when(s3.initiateMultipartUpload( + argThat(correctInitiateMultipartUploadRequest(file)))) + .thenReturn(initiateMultipartUploadResult); + + UploadPartResult uploadPartResult = new UploadPartResult(); + uploadPartResult.setETag("partETag"); + when(s3.uploadPart(argThat(correctUploadPartRequest(file)))) + .thenReturn(uploadPartResult); + + CompleteMultipartUploadResult multipartUploadResult = + new CompleteMultipartUploadResult(); + multipartUploadResult.setETag(eTag); + multipartUploadResult.setVersionId(versionId); + when(s3.completeMultipartUpload( + argThat(correctMultipartUploadRequest(file)))) + .thenReturn(multipartUploadResult); + + ListObjectsV2Result emptyListing = new ListObjectsV2Result(); + when(s3.listObjectsV2(argThat(correctListObjectsRequest(file + "/")))) + .thenReturn(emptyListing); + + FSDataOutputStream outputStream = fs.create(path); + outputStream.write(content); + outputStream.close(); + } + + private void assertContent(String file, Path path, byte[] content, + String eTag, String versionId) throws IOException { + S3Object s3Object = new S3Object(); + ObjectMetadata metadata = new ObjectMetadata(); + metadata.setHeader(Headers.S3_VERSION_ID, versionId); + metadata.setHeader(Headers.ETAG, eTag); + s3Object.setObjectMetadata(metadata); + s3Object.setObjectContent(new ByteArrayInputStream(content)); + when(s3.getObject(argThat(correctGetObjectRequest(file, eTag, versionId)))) + .thenReturn(s3Object); + FSDataInputStream inputStream = fs.open(path); + byte[] readContent = IOUtils.toByteArray(inputStream); + assertArrayEquals(content, readContent); + } + + private void assertVersionAttributes(Path path, String eTag, String versionId) + throws IOException { + MetadataStore metadataStore = fs.getMetadataStore(); + PathMetadata pathMetadata = metadataStore.get(path); + assertNotNull(pathMetadata); + S3AFileStatus fileStatus = pathMetadata.getFileStatus(); + assertEquals(eTag, fileStatus.getETag()); + assertEquals(versionId, fileStatus.getVersionId()); + } + + private Matcher correctGetObjectRequest(final String key, + final String eTag, final String versionId) { + return new BaseMatcher() { + @Override + public boolean matches(Object item) { + if (item instanceof GetObjectRequest) { + GetObjectRequest getObjectRequest = (GetObjectRequest) item; + if (getObjectRequest.getKey().equals(key)) { + if (changeDetectionSource.equals( + CHANGE_DETECT_SOURCE_ETAG)) { + return getObjectRequest.getMatchingETagConstraints() + .contains(eTag); + } else if (changeDetectionSource.equals( + CHANGE_DETECT_SOURCE_VERSION_ID)) { + return getObjectRequest.getVersionId().equals(versionId); + } + } + } + return false; + } + + @Override + public void describeTo(Description description) { + description.appendText("key and " + + changeDetectionSource + + " matches"); + } + }; + } + + private Matcher correctUploadPartRequest( + final String key) { + return new BaseMatcher() { + @Override + public boolean matches(Object item) { + if (item instanceof UploadPartRequest) { + UploadPartRequest request = (UploadPartRequest) item; + return request.getKey().equals(key); + } + return false; + } + + @Override + public void describeTo(Description description) { + description.appendText("key matches"); + } + }; + } + + private Matcher + correctInitiateMultipartUploadRequest(final String key) { + return new BaseMatcher() { + @Override + public void describeTo(Description description) { + description.appendText("key matches"); + } + + @Override + public boolean matches(Object item) { + if (item instanceof InitiateMultipartUploadRequest) { + InitiateMultipartUploadRequest request = + (InitiateMultipartUploadRequest) item; + return request.getKey().equals(key); + } + return false; + } + }; + } + + private Matcher + correctMultipartUploadRequest(final String key) { + return new BaseMatcher() { + @Override + public boolean matches(Object item) { + if (item instanceof CompleteMultipartUploadRequest) { + CompleteMultipartUploadRequest request = + (CompleteMultipartUploadRequest) item; + return request.getKey().equals(key); + } + return false; + } + + @Override + public void describeTo(Description description) { + description.appendText("key matches"); + } + }; + } + + private Matcher correctListObjectsRequest( + final String key) { + return new BaseMatcher() { + @Override + public boolean matches(Object item) { + if (item instanceof ListObjectsV2Request) { + ListObjectsV2Request listObjectsRequest = + (ListObjectsV2Request) item; + return listObjectsRequest.getPrefix().equals(key); + } + return false; + } + + @Override + public void describeTo(Description description) { + description.appendText("key matches"); + } + }; + } + + private Matcher correctPutObjectRequest( + final String key) { + return new BaseMatcher() { + @Override + public boolean matches(Object item) { + if (item instanceof PutObjectRequest) { + PutObjectRequest putObjectRequest = (PutObjectRequest) item; + return putObjectRequest.getKey().equals(key); + } + return false; + } + + @Override + public void describeTo(Description description) { + description.appendText("key matches"); + } + }; + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java index 704f51e3c0..70bf901514 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestPathMetadataDynamoDBTranslation.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.net.URI; +import java.util.Arrays; import java.util.Collection; import java.util.concurrent.Callable; @@ -31,16 +32,16 @@ import com.google.common.base.Preconditions; import org.junit.After; import org.junit.Assert; -import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.LambdaTestUtils; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import org.mockito.Mockito; import static com.amazonaws.services.dynamodbv2.model.KeyType.HASH; @@ -58,41 +59,75 @@ * Test the PathMetadataDynamoDBTranslation is able to translate between domain * model objects and DynamoDB items. */ +@RunWith(Parameterized.class) public class TestPathMetadataDynamoDBTranslation extends Assert { private static final Path TEST_DIR_PATH = new Path("s3a://test-bucket/myDir"); - private static final Item TEST_DIR_ITEM = new Item(); - private static DDBPathMetadata testDirPathMetadata; - private static final long TEST_FILE_LENGTH = 100; private static final long TEST_MOD_TIME = 9999; private static final long TEST_BLOCK_SIZE = 128; + private static final String TEST_ETAG = "abc"; + private static final String TEST_VERSION_ID = "def"; private static final Path TEST_FILE_PATH = new Path(TEST_DIR_PATH, "myFile"); - private static final Item TEST_FILE_ITEM = new Item(); - private static DDBPathMetadata testFilePathMetadata; - @BeforeClass - public static void setUpBeforeClass() throws IOException { + private final Item testFileItem; + private final DDBPathMetadata testFilePathMetadata; + private final Item testDirItem; + private final DDBPathMetadata testDirPathMetadata; + + @Parameterized.Parameters + public static Collection params() throws IOException { + String username = UserGroupInformation.getCurrentUser().getShortUserName(); + + return Arrays.asList(new Object[][]{ + // with etag and versionId + { + new Item() + .withPrimaryKey(PARENT, + pathToParentKey(TEST_FILE_PATH.getParent()), + CHILD, TEST_FILE_PATH.getName()) + .withBoolean(IS_DIR, false) + .withLong(FILE_LENGTH, TEST_FILE_LENGTH) + .withLong(MOD_TIME, TEST_MOD_TIME) + .withLong(BLOCK_SIZE, TEST_BLOCK_SIZE) + .withString(ETAG, TEST_ETAG) + .withString(VERSION_ID, TEST_VERSION_ID), + new DDBPathMetadata( + new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME, + TEST_FILE_PATH, TEST_BLOCK_SIZE, username, TEST_ETAG, + TEST_VERSION_ID)) + }, + // without etag or versionId + { + new Item() + .withPrimaryKey(PARENT, + pathToParentKey(TEST_FILE_PATH.getParent()), + CHILD, TEST_FILE_PATH.getName()) + .withBoolean(IS_DIR, false) + .withLong(FILE_LENGTH, TEST_FILE_LENGTH) + .withLong(MOD_TIME, TEST_MOD_TIME) + .withLong(BLOCK_SIZE, TEST_BLOCK_SIZE), + new DDBPathMetadata( + new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME, + TEST_FILE_PATH, TEST_BLOCK_SIZE, username, null, null)) + } + }); + } + + public TestPathMetadataDynamoDBTranslation(Item item, + DDBPathMetadata metadata) throws IOException { + testFileItem = item; + testFilePathMetadata = metadata; + String username = UserGroupInformation.getCurrentUser().getShortUserName(); testDirPathMetadata = new DDBPathMetadata(new S3AFileStatus(false, TEST_DIR_PATH, username)); - TEST_DIR_ITEM + testDirItem = new Item(); + testDirItem .withPrimaryKey(PARENT, "/test-bucket", CHILD, TEST_DIR_PATH.getName()) .withBoolean(IS_DIR, true); - - testFilePathMetadata = new DDBPathMetadata( - new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME, TEST_FILE_PATH, - TEST_BLOCK_SIZE, username)); - - TEST_FILE_ITEM - .withPrimaryKey(PARENT, pathToParentKey(TEST_FILE_PATH.getParent()), - CHILD, TEST_FILE_PATH.getName()) - .withBoolean(IS_DIR, false) - .withLong(FILE_LENGTH, TEST_FILE_LENGTH) - .withLong(MOD_TIME, TEST_MOD_TIME) - .withLong(BLOCK_SIZE, TEST_BLOCK_SIZE); } /** @@ -138,8 +173,8 @@ public void testItemToPathMetadata() throws IOException { UserGroupInformation.getCurrentUser().getShortUserName(); assertNull(itemToPathMetadata(null, user)); - verify(TEST_DIR_ITEM, itemToPathMetadata(TEST_DIR_ITEM, user)); - verify(TEST_FILE_ITEM, itemToPathMetadata(TEST_FILE_ITEM, user)); + verify(testDirItem, itemToPathMetadata(testDirItem, user)); + verify(testFileItem, itemToPathMetadata(testFileItem, user)); } /** @@ -147,7 +182,7 @@ public void testItemToPathMetadata() throws IOException { */ private static void verify(Item item, PathMetadata meta) { assertNotNull(meta); - final FileStatus status = meta.getFileStatus(); + final S3AFileStatus status = meta.getFileStatus(); final Path path = status.getPath(); assertEquals(item.get(PARENT), pathToParentKey(path.getParent())); assertEquals(item.get(CHILD), path.getName()); @@ -157,6 +192,10 @@ private static void verify(Item item, PathMetadata meta) { assertEquals(len, status.getLen()); long bSize = item.hasAttribute(BLOCK_SIZE) ? item.getLong(BLOCK_SIZE) : 0; assertEquals(bSize, status.getBlockSize()); + String eTag = item.getString(ETAG); + assertEquals(eTag, status.getETag()); + String versionId = item.getString(VERSION_ID); + assertEquals(versionId, status.getVersionId()); /* * S3AFileStatue#getModificationTime() reports the current time, so the @@ -252,7 +291,7 @@ public void testVersionMarkerNotStatusIllegalPath() throws Throwable { @Test public void testIsAuthoritativeCompatibilityItemToPathMetadata() throws Exception { - Item item = Mockito.spy(TEST_DIR_ITEM); + Item item = Mockito.spy(testDirItem); item.withBoolean(IS_AUTHORITATIVE, true); PathMetadataDynamoDBTranslation.IGNORED_FIELDS.add(IS_AUTHORITATIVE); @@ -288,7 +327,7 @@ public void testIsAuthoritativeCompatibilityPathMetadataToItem() { @Test public void testIsLastUpdatedCompatibilityItemToPathMetadata() throws Exception { - Item item = Mockito.spy(TEST_DIR_ITEM); + Item item = Mockito.spy(testDirItem); item.withLong(LAST_UPDATED, 100); PathMetadataDynamoDBTranslation.IGNORED_FIELDS.add(LAST_UPDATED); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java index 1ddfed414d..b246da2d50 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3Guard.java @@ -26,6 +26,8 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AFileStatus; +import org.apache.hadoop.fs.s3a.Tristate; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_METADATASTORE_AUTHORITATIVE_DIR_TTL; @@ -51,7 +53,7 @@ public void testDirListingUnion() throws Exception { Arrays.asList(m1, m2), false); // Two other files in s3 - List s3Listing = Arrays.asList( + List s3Listing = Arrays.asList( makeFileStatus("s3a://bucket/dir/s3-file3", false), makeFileStatus("s3a://bucket/dir/s3-file4", false) ); @@ -86,12 +88,15 @@ private PathMetadata makePathMeta(String pathStr, boolean isDir) { return new PathMetadata(makeFileStatus(pathStr, isDir)); } - private FileStatus makeFileStatus(String pathStr, boolean isDir) { + private S3AFileStatus makeFileStatus(String pathStr, boolean isDir) { Path p = new Path(pathStr); + S3AFileStatus fileStatus; if (isDir) { - return new FileStatus(0, true, 1, 1, System.currentTimeMillis(), p); + fileStatus = new S3AFileStatus(Tristate.UNKNOWN, p, null); } else { - return new FileStatus(100, false, 1, 1, System.currentTimeMillis(), p); + fileStatus = new S3AFileStatus( + 100, System.currentTimeMillis(), p, 1, null, null, null); } + return fileStatus; } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java index 0e6a1d8d09..b843392ebf 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractITestS3AMetadataStoreScale.java @@ -170,7 +170,8 @@ protected S3AFileStatus copyStatus(S3AFileStatus status) { status.getOwner()); } else { return new S3AFileStatus(status.getLen(), status.getModificationTime(), - status.getPath(), status.getBlockSize(), status.getOwner()); + status.getPath(), status.getBlockSize(), status.getOwner(), + status.getETag(), status.getVersionId()); } } @@ -207,7 +208,8 @@ private static void printTiming(Logger log, String op, NanoTimer timer, } protected static S3AFileStatus makeFileStatus(Path path) throws IOException { - return new S3AFileStatus(SIZE, ACCESS_TIME, path, BLOCK_SIZE, OWNER); + return new S3AFileStatus(SIZE, ACCESS_TIME, path, BLOCK_SIZE, OWNER, + null, null); } protected static S3AFileStatus makeDirStatus(Path p) throws IOException { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectCLI.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectCLI.java index fccf708fef..e31b48e5b5 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectCLI.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectCLI.java @@ -25,6 +25,7 @@ import java.nio.charset.Charset; import java.util.List; +import org.junit.Assume; import org.junit.Test; import org.apache.commons.io.IOUtils; @@ -34,6 +35,8 @@ import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.apache.hadoop.fs.s3a.Statistic; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool; import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.OperationDuration; @@ -80,6 +83,11 @@ public void setup() throws Exception { selectConf = new Configuration(getConfiguration()); localFile = getTempFilename(); landsatSrc = getLandsatGZ().toString(); + ChangeDetectionPolicy changeDetectionPolicy = + getLandsatFS().getChangeDetectionPolicy(); + Assume.assumeFalse("the standard landsat bucket doesn't have versioning", + changeDetectionPolicy.getSource() == Source.VersionId + && changeDetectionPolicy.isRequireVersion()); } @Override diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java index 78f3a6d1fe..2099edd248 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.List; +import org.junit.Assume; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,6 +33,8 @@ import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.S3AInstrumentation; import org.apache.hadoop.fs.s3a.S3ATestUtils; @@ -190,6 +193,11 @@ public void setup() throws Exception { // disable the gzip codec, so that the record readers do not // get confused enablePassthroughCodec(selectConf, ".gz"); + ChangeDetectionPolicy changeDetectionPolicy = + getLandsatFS().getChangeDetectionPolicy(); + Assume.assumeFalse("the standard landsat bucket doesn't have versioning", + changeDetectionPolicy.getSource() == Source.VersionId + && changeDetectionPolicy.isRequireVersion()); } protected int getMaxLines() { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectMRJob.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectMRJob.java index ee7de8c7ac..181d797767 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectMRJob.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectMRJob.java @@ -21,6 +21,9 @@ import java.io.IOException; import java.util.concurrent.atomic.AtomicLong; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source; +import org.junit.Assume; import org.junit.Test; import org.apache.hadoop.conf.Configuration; @@ -90,6 +93,13 @@ public class ITestS3SelectMRJob extends AbstractS3SelectTest { public void setup() throws Exception { super.setup(); fs = S3ATestUtils.createTestFileSystem(conf); + + ChangeDetectionPolicy changeDetectionPolicy = + getLandsatFS().getChangeDetectionPolicy(); + Assume.assumeFalse("the standard landsat bucket doesn't have versioning", + changeDetectionPolicy.getSource() == Source.VersionId + && changeDetectionPolicy.isRequireVersion()); + rootPath = path("ITestS3SelectMRJob"); Path workingDir = path("working"); fs.setWorkingDirectory(workingDir); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/mapreduce/filecache/TestS3AResourceScope.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/mapreduce/filecache/TestS3AResourceScope.java index c9b1ddc97e..172f79e09a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/mapreduce/filecache/TestS3AResourceScope.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/mapreduce/filecache/TestS3AResourceScope.java @@ -48,7 +48,7 @@ public void testS3AFilesArePrivate() throws Throwable { @Test public void testS3AFilesArePrivateOtherContstructor() throws Throwable { - S3AFileStatus status = new S3AFileStatus(0, 0, PATH, 1, "self"); + S3AFileStatus status = new S3AFileStatus(0, 0, PATH, 1, "self", null, null); assertTrue("Not encrypted: " + status, status.isEncrypted()); assertNotExecutable(status); }