HADOOP-16085. S3Guard: use object version or etags to protect against inconsistent read after replace/overwrite.

Contributed by Ben Roling.

S3Guard will now track the etag of uploaded files and, if an S3
bucket is versioned, the object version.

You can then control how to react to a mismatch between the data
in the DynamoDB table and that in the store: warn, fail, or, when
using versions, return the original value.

This adds two new columns to the table: etag and version.
This is transparent to older S3A clients -but when such clients
add/update data to the S3Guard table, they will not add these values.
As a result, the etag/version checks will not work with files uploaded by older clients.

For a consistent experience, upgrade all clients to use the latest hadoop version.
This commit is contained in:
Ben Roling 2019-05-19 22:29:54 +01:00 committed by Steve Loughran
parent 729ccb2cab
commit a36274d699
No known key found for this signature in database
GPG Key ID: D22CF846DBB162A0
56 changed files with 3333 additions and 465 deletions

View File

@ -1904,15 +1904,15 @@
<name>fs.s3a.change.detection.mode</name>
<value>server</value>
<description>
Determines how change detection is applied to alert to S3 objects
rewritten while being read. Value 'server' indicates to apply the attribute
constraint directly on GetObject requests to S3. Value 'client' means to do a
client-side comparison of the attribute value returned in the response. Value
'server' would not work with third-party S3 implementations that do not
support these constraints on GetObject. Values 'server' and 'client' generate
RemoteObjectChangedException when a mismatch is detected. Value 'warn' works
like 'client' but generates only a warning. Value 'none' will ignore change
detection completely.
Determines how change detection is applied to alert to inconsistent S3
objects read during or after an overwrite. Value 'server' indicates to apply
the attribute constraint directly on GetObject requests to S3. Value 'client'
means to do a client-side comparison of the attribute value returned in the
response. Value 'server' would not work with third-party S3 implementations
that do not support these constraints on GetObject. Values 'server' and
'client' generate RemoteObjectChangedException when a mismatch is detected.
Value 'warn' works like 'client' but generates only a warning. Value 'none'
will ignore change detection completely.
</description>
</property>

View File

@ -406,6 +406,11 @@
<artifactId>hadoop-common</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>

View File

@ -197,6 +197,33 @@ public void retry(String action,
});
}
/**
* Execute a void operation with retry processing when doRetry=true, else
* just once.
* @param doRetry true if retries should be performed
* @param action action to execute (used in error messages)
* @param path path of work (used in error messages)
* @param idempotent does the operation have semantics
* which mean that it can be retried even if was already executed?
* @param retrying callback on retries
* @param operation operation to execute
* @throws IOException any IOE raised, or translated exception
*/
@Retries.RetryTranslated
public void maybeRetry(boolean doRetry,
String action,
String path,
boolean idempotent,
Retried retrying,
VoidOperation operation)
throws IOException {
maybeRetry(doRetry, action, path, idempotent, retrying,
() -> {
operation.execute();
return null;
});
}
/**
* Execute a void operation with the default retry callback invoked.
* @param action action to execute (used in error messages)
@ -215,6 +242,28 @@ public void retry(String action,
retry(action, path, idempotent, retryCallback, operation);
}
/**
* Execute a void operation with the default retry callback invoked when
* doRetry=true, else just once.
* @param doRetry true if retries should be performed
* @param action action to execute (used in error messages)
* @param path path of work (used in error messages)
* @param idempotent does the operation have semantics
* which mean that it can be retried even if was already executed?
* @param operation operation to execute
* @throws IOException any IOE raised, or translated exception
*/
@Retries.RetryTranslated
public void maybeRetry(
boolean doRetry,
String action,
String path,
boolean idempotent,
VoidOperation operation)
throws IOException {
maybeRetry(doRetry, action, path, idempotent, retryCallback, operation);
}
/**
* Execute a function with the default retry callback invoked.
* @param action action to execute (used in error messages)
@ -265,6 +314,41 @@ public <T> T retry(
() -> once(action, path, operation));
}
/**
* Execute a function with retry processing when doRetry=true, else just once.
* Uses {@link #once(String, String, Operation)} as the inner
* invocation mechanism before retry logic is performed.
* @param <T> type of return value
* @param doRetry true if retries should be performed
* @param action action to execute (used in error messages)
* @param path path of work (used in error messages)
* @param idempotent does the operation have semantics
* which mean that it can be retried even if was already executed?
* @param retrying callback on retries
* @param operation operation to execute
* @return the result of the call
* @throws IOException any IOE raised, or translated exception
*/
@Retries.RetryTranslated
public <T> T maybeRetry(
boolean doRetry,
String action,
@Nullable String path,
boolean idempotent,
Retried retrying,
Operation<T> operation)
throws IOException {
if (doRetry) {
return retryUntranslated(
toDescription(action, path),
idempotent,
retrying,
() -> once(action, path, operation));
} else {
return once(action, path, operation);
}
}
/**
* Execute a function with retry processing and no translation.
* and the default retry callback.

View File

@ -33,10 +33,11 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
@ -68,7 +69,7 @@ public Listing(S3AFileSystem owner) {
* @return the file status iterator
*/
ProvidedFileStatusIterator createProvidedFileStatusIterator(
FileStatus[] fileStatuses,
S3AFileStatus[] fileStatuses,
PathFilter filter,
FileStatusAcceptor acceptor) {
return new ProvidedFileStatusIterator(fileStatuses, filter, acceptor);
@ -114,7 +115,7 @@ FileStatusListingIterator createFileStatusListingIterator(
S3ListRequest request,
PathFilter filter,
Listing.FileStatusAcceptor acceptor,
RemoteIterator<FileStatus> providedStatus) throws IOException {
RemoteIterator<S3AFileStatus> providedStatus) throws IOException {
return new FileStatusListingIterator(
new ObjectListingIterator(listPath, request),
filter,
@ -129,7 +130,7 @@ FileStatusListingIterator createFileStatusListingIterator(
*/
@VisibleForTesting
LocatedFileStatusIterator createLocatedFileStatusIterator(
RemoteIterator<FileStatus> statusIterator) {
RemoteIterator<S3AFileStatus> statusIterator) {
return new LocatedFileStatusIterator(statusIterator);
}
@ -143,7 +144,7 @@ LocatedFileStatusIterator createLocatedFileStatusIterator(
*/
@VisibleForTesting
TombstoneReconcilingIterator createTombstoneReconcilingIterator(
RemoteIterator<LocatedFileStatus> iterator, Set<Path> tombstones) {
RemoteIterator<S3ALocatedFileStatus> iterator, Set<Path> tombstones) {
return new TombstoneReconcilingIterator(iterator, tombstones);
}
@ -189,19 +190,19 @@ interface FileStatusAcceptor {
* iterator returned.
*/
static final class SingleStatusRemoteIterator
implements RemoteIterator<LocatedFileStatus> {
implements RemoteIterator<S3ALocatedFileStatus> {
/**
* The status to return; set to null after the first iteration.
*/
private LocatedFileStatus status;
private S3ALocatedFileStatus status;
/**
* Constructor.
* @param status status value: may be null, in which case
* the iterator is empty.
*/
public SingleStatusRemoteIterator(LocatedFileStatus status) {
SingleStatusRemoteIterator(S3ALocatedFileStatus status) {
this.status = status;
}
@ -226,9 +227,9 @@ public boolean hasNext() throws IOException {
* to the constructor.
*/
@Override
public LocatedFileStatus next() throws IOException {
public S3ALocatedFileStatus next() throws IOException {
if (hasNext()) {
LocatedFileStatus s = this.status;
S3ALocatedFileStatus s = this.status;
status = null;
return s;
} else {
@ -247,16 +248,16 @@ public LocatedFileStatus next() throws IOException {
* There is no remote data to fetch.
*/
static class ProvidedFileStatusIterator
implements RemoteIterator<FileStatus> {
private final ArrayList<FileStatus> filteredStatusList;
implements RemoteIterator<S3AFileStatus> {
private final ArrayList<S3AFileStatus> filteredStatusList;
private int index = 0;
ProvidedFileStatusIterator(FileStatus[] fileStatuses, PathFilter filter,
ProvidedFileStatusIterator(S3AFileStatus[] fileStatuses, PathFilter filter,
FileStatusAcceptor acceptor) {
Preconditions.checkArgument(fileStatuses != null, "Null status list!");
filteredStatusList = new ArrayList<>(fileStatuses.length);
for (FileStatus status : fileStatuses) {
for (S3AFileStatus status : fileStatuses) {
if (filter.accept(status.getPath()) && acceptor.accept(status)) {
filteredStatusList.add(status);
}
@ -270,7 +271,7 @@ public boolean hasNext() throws IOException {
}
@Override
public FileStatus next() throws IOException {
public S3AFileStatus next() throws IOException {
if (!hasNext()) {
throw new NoSuchElementException();
}
@ -305,7 +306,7 @@ public FileStatus next() throws IOException {
* Thread safety: None.
*/
class FileStatusListingIterator
implements RemoteIterator<FileStatus> {
implements RemoteIterator<S3AFileStatus> {
/** Source of objects. */
private final ObjectListingIterator source;
@ -316,10 +317,10 @@ class FileStatusListingIterator
/** request batch size. */
private int batchSize;
/** Iterator over the current set of results. */
private ListIterator<FileStatus> statusBatchIterator;
private ListIterator<S3AFileStatus> statusBatchIterator;
private final Set<FileStatus> providedStatus;
private Iterator<FileStatus> providedStatusIterator;
private final Map<Path, S3AFileStatus> providedStatus;
private Iterator<S3AFileStatus> providedStatusIterator;
/**
* Create an iterator over file status entries.
@ -335,15 +336,16 @@ class FileStatusListingIterator
FileStatusListingIterator(ObjectListingIterator source,
PathFilter filter,
FileStatusAcceptor acceptor,
RemoteIterator<FileStatus> providedStatus) throws IOException {
RemoteIterator<S3AFileStatus> providedStatus) throws IOException {
this.source = source;
this.filter = filter;
this.acceptor = acceptor;
this.providedStatus = new HashSet<>();
this.providedStatus = new HashMap<>();
for (; providedStatus != null && providedStatus.hasNext();) {
final FileStatus status = providedStatus.next();
if (filter.accept(status.getPath()) && acceptor.accept(status)) {
this.providedStatus.add(status);
final S3AFileStatus status = providedStatus.next();
Path path = status.getPath();
if (filter.accept(path) && acceptor.accept(status)) {
this.providedStatus.put(path, status);
}
}
// build the first set of results. This will not trigger any
@ -376,7 +378,7 @@ private boolean sourceHasNext() throws IOException {
// turn to file status that are only in provided list
if (providedStatusIterator == null) {
LOG.debug("Start iterating the provided status.");
providedStatusIterator = providedStatus.iterator();
providedStatusIterator = providedStatus.values().iterator();
}
return false;
}
@ -384,14 +386,21 @@ private boolean sourceHasNext() throws IOException {
@Override
@Retries.RetryTranslated
public FileStatus next() throws IOException {
final FileStatus status;
public S3AFileStatus next() throws IOException {
final S3AFileStatus status;
if (sourceHasNext()) {
status = statusBatchIterator.next();
// We remove from provided list the file status listed by S3 so that
// We remove from provided map the file status listed by S3 so that
// this does not return duplicate items.
if (providedStatus.remove(status)) {
LOG.debug("Removed the status from provided file status {}", status);
// The provided status is returned as it is assumed to have the better
// metadata (i.e. the eTag and versionId from S3Guard)
S3AFileStatus provided = providedStatus.remove(status.getPath());
if (provided != null) {
LOG.debug(
"Removed and returned the status from provided file status {}",
status);
return provided;
}
} else {
if (providedStatusIterator.hasNext()) {
@ -441,7 +450,7 @@ private boolean buildNextStatusBatch(S3ListResult objects) {
// counters for debug logs
int added = 0, ignored = 0;
// list to fill in with results. Initial size will be list maximum.
List<FileStatus> stats = new ArrayList<>(
List<S3AFileStatus> stats = new ArrayList<>(
objects.getObjectSummaries().size() +
objects.getCommonPrefixes().size());
// objects
@ -453,8 +462,9 @@ private boolean buildNextStatusBatch(S3ListResult objects) {
}
// Skip over keys that are ourselves and old S3N _$folder$ files
if (acceptor.accept(keyPath, summary) && filter.accept(keyPath)) {
FileStatus status = createFileStatus(keyPath, summary,
owner.getDefaultBlockSize(keyPath), owner.getUsername());
S3AFileStatus status = createFileStatus(keyPath, summary,
owner.getDefaultBlockSize(keyPath), owner.getUsername(),
null, null);
LOG.debug("Adding: {}", status);
stats.add(status);
added++;
@ -468,7 +478,7 @@ private boolean buildNextStatusBatch(S3ListResult objects) {
for (String prefix : objects.getCommonPrefixes()) {
Path keyPath = owner.keyToQualifiedPath(prefix);
if (acceptor.accept(keyPath, prefix) && filter.accept(keyPath)) {
FileStatus status = new S3AFileStatus(Tristate.FALSE, keyPath,
S3AFileStatus status = new S3AFileStatus(Tristate.FALSE, keyPath,
owner.getUsername());
LOG.debug("Adding directory: {}", status);
added++;
@ -679,14 +689,14 @@ public boolean accept(FileStatus status) {
* return a remote iterator of {@link LocatedFileStatus} instances.
*/
class LocatedFileStatusIterator
implements RemoteIterator<LocatedFileStatus> {
private final RemoteIterator<FileStatus> statusIterator;
implements RemoteIterator<S3ALocatedFileStatus> {
private final RemoteIterator<S3AFileStatus> statusIterator;
/**
* Constructor.
* @param statusIterator an iterator over the remote status entries
*/
LocatedFileStatusIterator(RemoteIterator<FileStatus> statusIterator) {
LocatedFileStatusIterator(RemoteIterator<S3AFileStatus> statusIterator) {
this.statusIterator = statusIterator;
}
@ -696,7 +706,7 @@ public boolean hasNext() throws IOException {
}
@Override
public LocatedFileStatus next() throws IOException {
public S3ALocatedFileStatus next() throws IOException {
return owner.toLocatedFileStatus(statusIterator.next());
}
}
@ -708,16 +718,16 @@ public LocatedFileStatus next() throws IOException {
* remain in the source iterator.
*/
static class TombstoneReconcilingIterator implements
RemoteIterator<LocatedFileStatus> {
private LocatedFileStatus next = null;
private final RemoteIterator<LocatedFileStatus> iterator;
RemoteIterator<S3ALocatedFileStatus> {
private S3ALocatedFileStatus next = null;
private final RemoteIterator<S3ALocatedFileStatus> iterator;
private final Set<Path> tombstones;
/**
* @param iterator Source iterator to filter
* @param tombstones set of tombstone markers to filter out of results
*/
TombstoneReconcilingIterator(RemoteIterator<LocatedFileStatus>
TombstoneReconcilingIterator(RemoteIterator<S3ALocatedFileStatus>
iterator, Set<Path> tombstones) {
this.iterator = iterator;
if (tombstones != null) {
@ -729,7 +739,7 @@ static class TombstoneReconcilingIterator implements
private boolean fetch() throws IOException {
while (next == null && iterator.hasNext()) {
LocatedFileStatus candidate = iterator.next();
S3ALocatedFileStatus candidate = iterator.next();
if (!tombstones.contains(candidate.getPath())) {
next = candidate;
return true;
@ -745,9 +755,9 @@ public boolean hasNext() throws IOException {
return fetch();
}
public LocatedFileStatus next() throws IOException {
public S3ALocatedFileStatus next() throws IOException {
if (hasNext()) {
LocatedFileStatus result = next;
S3ALocatedFileStatus result = next;
next = null;
fetch();
return result;

View File

@ -32,6 +32,9 @@
@InterfaceStability.Unstable
public class RemoteFileChangedException extends PathIOException {
public static final String PRECONDITIONS_FAILED =
"Constraints of request were unsatisfiable";
/**
* Constructs a RemoteFileChangedException.
*
@ -46,4 +49,21 @@ public RemoteFileChangedException(String path,
super(path, message);
setOperation(operation);
}
/**
* Constructs a RemoteFileChangedException.
*
* @param path the path accessed when the change was detected
* @param operation the operation (e.g. open, re-open) performed when the
* change was detected
* @param message a message providing more details about the condition
* @param cause inner cause.
*/
public RemoteFileChangedException(String path,
String operation,
String message,
Throwable cause) {
super(path, message, cause);
setOperation(operation);
}
}

View File

@ -32,6 +32,8 @@
@InterfaceStability.Evolving
public class S3AFileStatus extends FileStatus {
private Tristate isEmptyDirectory;
private String eTag;
private String versionId;
/**
* Create a directory status.
@ -69,15 +71,17 @@ public S3AFileStatus(Tristate isemptydir,
* @param path path
* @param blockSize block size
* @param owner owner
* @param eTag eTag of the S3 object if available, else null
* @param versionId versionId of the S3 object if available, else null
*/
public S3AFileStatus(long length, long modification_time, Path path,
long blockSize, String owner) {
super(length, false, 1, blockSize, modification_time, 0,
null, null, null, null,
long blockSize, String owner, String eTag, String versionId) {
super(length, false, 1, blockSize, modification_time,
0, null, owner, owner, null,
path, false, true, false);
isEmptyDirectory = Tristate.FALSE;
setOwner(owner);
setGroup(owner);
this.eTag = eTag;
this.versionId = versionId;
}
/**
@ -86,16 +90,19 @@ public S3AFileStatus(long length, long modification_time, Path path,
* @param source FileStatus to convert to S3AFileStatus
* @param isEmptyDirectory TRUE/FALSE if known to be / not be an empty
* directory, UNKNOWN if that information was not computed.
* @param eTag eTag of the S3 object if available, else null
* @param versionId versionId of the S3 object if available, else null
* @return a new S3AFileStatus
*/
public static S3AFileStatus fromFileStatus(FileStatus source,
Tristate isEmptyDirectory) {
Tristate isEmptyDirectory, String eTag, String versionId) {
if (source.isDirectory()) {
return new S3AFileStatus(isEmptyDirectory, source.getPath(),
source.getOwner());
} else {
return new S3AFileStatus(source.getLen(), source.getModificationTime(),
source.getPath(), source.getBlockSize(), source.getOwner());
source.getPath(), source.getBlockSize(), source.getOwner(),
eTag, versionId);
}
}
@ -109,6 +116,28 @@ public Tristate isEmptyDirectory() {
return isEmptyDirectory;
}
/**
* Update isEmptyDirectory attribute.
* @param isEmptyDirectory new isEmptyDirectory value
*/
public void setIsEmptyDirectory(Tristate isEmptyDirectory) {
this.isEmptyDirectory = isEmptyDirectory;
}
/**
* @return the S3 object eTag when available, else null.
*/
public String getETag() {
return eTag;
}
/**
* @return the S3 object versionId when available, else null.
*/
public String getVersionId() {
return versionId;
}
/** Compare if this object is equal to another object.
* @param o the object to be compared.
* @return true if two file status has the same path name; false if not.
@ -150,8 +179,10 @@ public long getModificationTime(){
@Override
public String toString() {
return super.toString() +
String.format(" isEmptyDirectory=%s", isEmptyDirectory().name());
return super.toString()
+ String.format(" isEmptyDirectory=%s", isEmptyDirectory().name()
+ String.format(" eTag=%s", eTag)
+ String.format(" versionId=%s", versionId));
}
}

View File

@ -49,6 +49,7 @@
import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.SdkBaseException;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.AbortMultipartUploadRequest;
import com.amazonaws.services.s3.model.CannedAccessControlList;
@ -74,6 +75,7 @@
import com.amazonaws.services.s3.transfer.TransferManager;
import com.amazonaws.services.s3.transfer.TransferManagerConfiguration;
import com.amazonaws.services.s3.transfer.Upload;
import com.amazonaws.services.s3.transfer.model.CopyResult;
import com.amazonaws.services.s3.transfer.model.UploadResult;
import com.amazonaws.event.ProgressListener;
import com.google.common.annotations.VisibleForTesting;
@ -89,6 +91,7 @@
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy;
import org.apache.hadoop.fs.s3a.impl.CopyOutcome;
import org.apache.hadoop.fs.s3a.select.InternalSelectConstants;
import org.apache.hadoop.util.LambdaUtils;
import org.apache.hadoop.fs.FileAlreadyExistsException;
@ -115,6 +118,7 @@
import org.apache.hadoop.fs.s3a.commit.CommitConstants;
import org.apache.hadoop.fs.s3a.commit.PutTracker;
import org.apache.hadoop.fs.s3a.commit.MagicCommitIntegration;
import org.apache.hadoop.fs.s3a.impl.ChangeTracker;
import org.apache.hadoop.fs.s3a.select.SelectBinding;
import org.apache.hadoop.fs.s3a.select.SelectConstants;
import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
@ -324,26 +328,7 @@ public void initialize(URI name, Configuration originalConf)
readAhead = longBytesOption(conf, READAHEAD_RANGE,
DEFAULT_READAHEAD_RANGE, 0);
int maxThreads = conf.getInt(MAX_THREADS, DEFAULT_MAX_THREADS);
if (maxThreads < 2) {
LOG.warn(MAX_THREADS + " must be at least 2: forcing to 2.");
maxThreads = 2;
}
int totalTasks = intOption(conf,
MAX_TOTAL_TASKS, DEFAULT_MAX_TOTAL_TASKS, 1);
long keepAliveTime = longOption(conf, KEEPALIVE_TIME,
DEFAULT_KEEPALIVE_TIME, 0);
boundedThreadPool = BlockingThreadPoolExecutorService.newInstance(
maxThreads,
maxThreads + totalTasks,
keepAliveTime, TimeUnit.SECONDS,
"s3a-transfer-shared");
unboundedThreadPool = new ThreadPoolExecutor(
maxThreads, Integer.MAX_VALUE,
keepAliveTime, TimeUnit.SECONDS,
new LinkedBlockingQueue<Runnable>(),
BlockingThreadPoolExecutorService.newDaemonThreadFactory(
"s3a-transfer-unbounded"));
initThreadPools(conf);
int listVersion = conf.getInt(LIST_VERSION, DEFAULT_LIST_VERSION);
if (listVersion < 1 || listVersion > 2) {
@ -412,6 +397,29 @@ public void initialize(URI name, Configuration originalConf)
}
private void initThreadPools(Configuration conf) {
int maxThreads = conf.getInt(MAX_THREADS, DEFAULT_MAX_THREADS);
if (maxThreads < 2) {
LOG.warn(MAX_THREADS + " must be at least 2: forcing to 2.");
maxThreads = 2;
}
int totalTasks = intOption(conf,
MAX_TOTAL_TASKS, DEFAULT_MAX_TOTAL_TASKS, 1);
long keepAliveTime = longOption(conf, KEEPALIVE_TIME,
DEFAULT_KEEPALIVE_TIME, 0);
boundedThreadPool = BlockingThreadPoolExecutorService.newInstance(
maxThreads,
maxThreads + totalTasks,
keepAliveTime, TimeUnit.SECONDS,
"s3a-transfer-shared");
unboundedThreadPool = new ThreadPoolExecutor(
maxThreads, Integer.MAX_VALUE,
keepAliveTime, TimeUnit.SECONDS,
new LinkedBlockingQueue<Runnable>(),
BlockingThreadPoolExecutorService.newDaemonThreadFactory(
"s3a-transfer-unbounded"));
}
/**
* Create the storage statistics or bind to an existing one.
* @return a storage statistics instance.
@ -652,6 +660,13 @@ protected void setAmazonS3Client(AmazonS3 client) {
Preconditions.checkNotNull(client, "client");
LOG.debug("Setting S3 client to {}", client);
s3 = client;
// Need to use a new TransferManager that uses the new client.
// Also, using a new TransferManager requires a new threadpool as the old
// TransferManager will shut the thread pool down when it is garbage
// collected.
initThreadPools(getConf());
initTransferManager();
}
/**
@ -697,10 +712,11 @@ public S3AInputPolicy getInputPolicy() {
/**
* Get the change detection policy for this FS instance.
* Only public to allow access in tests in other packages.
* @return the change detection policy
*/
@VisibleForTesting
ChangeDetectionPolicy getChangeDetectionPolicy() {
public ChangeDetectionPolicy getChangeDetectionPolicy() {
return changeDetectionPolicy;
}
@ -877,7 +893,7 @@ private FSDataInputStream open(
throws IOException {
entryPoint(INVOCATION_OPEN);
final FileStatus fileStatus = getFileStatus(path);
final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path);
if (fileStatus.isDirectory()) {
throw new FileNotFoundException("Can't open " + path
+ " because it is a directory");
@ -910,7 +926,10 @@ private FSDataInputStream open(
return new FSDataInputStream(
new S3AInputStream(
readContext,
createObjectAttributes(path),
createObjectAttributes(
path,
fileStatus.getETag(),
fileStatus.getVersionId()),
fileStatus.getLen(),
s3));
}
@ -943,13 +962,20 @@ private S3AReadOpContext createReadContext(
/**
* Create the attributes of an object for a get/select request.
* @param f path path of the request.
* @param eTag the eTag of the S3 object
* @param versionId S3 object version ID
* @return attributes to use when building the query.
*/
private S3ObjectAttributes createObjectAttributes(final Path f) {
private S3ObjectAttributes createObjectAttributes(
final Path f,
final String eTag,
final String versionId) {
return new S3ObjectAttributes(bucket,
pathToKey(f),
getServerSideEncryptionAlgorithm(),
encryptionSecrets.getEncryptionKey());
encryptionSecrets.getEncryptionKey(),
eTag,
versionId);
}
/**
@ -1213,19 +1239,27 @@ private boolean innerRename(Path source, Path dest)
if (srcStatus.isFile()) {
LOG.debug("rename: renaming file {} to {}", src, dst);
long length = srcStatus.getLen();
S3ObjectAttributes objectAttributes =
createObjectAttributes(srcStatus.getPath(),
srcStatus.getETag(), srcStatus.getVersionId());
S3AReadOpContext readContext = createReadContext(srcStatus, inputPolicy,
changeDetectionPolicy, readAhead);
if (dstStatus != null && dstStatus.isDirectory()) {
String newDstKey = maybeAddTrailingSlash(dstKey);
String filename =
srcKey.substring(pathToKey(src.getParent()).length()+1);
newDstKey = newDstKey + filename;
copyFile(srcKey, newDstKey, length);
CopyResult copyResult = copyFile(srcKey, newDstKey, length,
objectAttributes, readContext);
S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, src,
keyToQualifiedPath(newDstKey), length, getDefaultBlockSize(dst),
username);
username, copyResult.getETag(), copyResult.getVersionId());
} else {
copyFile(srcKey, dstKey, srcStatus.getLen());
CopyResult copyResult = copyFile(srcKey, dstKey, srcStatus.getLen(),
objectAttributes, readContext);
S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, src, dst,
length, getDefaultBlockSize(dst), username);
length, getDefaultBlockSize(dst), username,
copyResult.getETag(), copyResult.getVersionId());
}
innerDelete(srcStatus, false);
} else {
@ -1248,10 +1282,10 @@ private boolean innerRename(Path source, Path dest)
}
Path parentPath = keyToQualifiedPath(srcKey);
RemoteIterator<LocatedFileStatus> iterator = listFilesAndEmptyDirectories(
parentPath, true);
RemoteIterator<S3ALocatedFileStatus> iterator =
listFilesAndEmptyDirectories(parentPath, true);
while (iterator.hasNext()) {
LocatedFileStatus status = iterator.next();
S3ALocatedFileStatus status = iterator.next();
long length = status.getLen();
String key = pathToKey(status.getPath());
if (status.isDirectory() && !key.endsWith("/")) {
@ -1261,7 +1295,13 @@ private boolean innerRename(Path source, Path dest)
.add(new DeleteObjectsRequest.KeyVersion(key));
String newDstKey =
dstKey + key.substring(srcKey.length());
copyFile(key, newDstKey, length);
S3ObjectAttributes objectAttributes =
createObjectAttributes(status.getPath(),
status.getETag(), status.getVersionId());
S3AReadOpContext readContext = createReadContext(status, inputPolicy,
changeDetectionPolicy, readAhead);
CopyResult copyResult = copyFile(key, newDstKey, length,
objectAttributes, readContext);
if (hasMetadataStore()) {
// with a metadata store, the object entries need to be updated,
@ -1273,7 +1313,8 @@ private boolean innerRename(Path source, Path dest)
childDst, username);
} else {
S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, childSrc,
childDst, length, getDefaultBlockSize(childDst), username);
childDst, length, getDefaultBlockSize(childDst), username,
copyResult.getETag(), copyResult.getVersionId());
}
// Ancestor directories may not be listed, so we explicitly add them
S3Guard.addMoveAncestors(metadataStore, srcPaths, dstMetas,
@ -1319,10 +1360,28 @@ private boolean innerRename(Path source, Path dest)
@VisibleForTesting
@Retries.RetryTranslated
public ObjectMetadata getObjectMetadata(Path path) throws IOException {
return getObjectMetadata(path, null, invoker, null);
}
/**
* Low-level call to get at the object metadata.
* @param path path to the object
* @param changeTracker the change tracker to detect version inconsistencies
* @param changeInvoker the invoker providing the retry policy
* @param operation the operation being performed (e.g. "read" or "copy")
* @return metadata
* @throws IOException IO and object access problems.
*/
@VisibleForTesting
@Retries.RetryTranslated
public ObjectMetadata getObjectMetadata(Path path,
ChangeTracker changeTracker, Invoker changeInvoker, String operation)
throws IOException {
return once("getObjectMetadata", path.toString(),
() ->
// this always does a full HEAD to the object
getObjectMetadata(pathToKey(path)));
getObjectMetadata(
pathToKey(path), changeTracker, changeInvoker, operation));
}
/**
@ -1493,14 +1552,41 @@ public S3AStorageStatistics getStorageStatistics() {
*/
@Retries.RetryRaw
protected ObjectMetadata getObjectMetadata(String key) throws IOException {
return getObjectMetadata(key, null, invoker,null);
}
/**
* Request object metadata; increments counters in the process.
* Retry policy: retry untranslated.
* Uses changeTracker to detect an unexpected file version (eTag or versionId)
* @param key key
* @param changeTracker the change tracker to detect unexpected object version
* @param changeInvoker the invoker providing the retry policy
* @param operation the operation (e.g. "read" or "copy") triggering this call
* @return the metadata
* @throws IOException if the retry invocation raises one (it shouldn't).
* @throws RemoteFileChangedException if an unexpected version is detected
*/
@Retries.RetryRaw
protected ObjectMetadata getObjectMetadata(String key,
ChangeTracker changeTracker,
Invoker changeInvoker,
String operation) throws IOException {
GetObjectMetadataRequest request =
new GetObjectMetadataRequest(bucket, key);
//SSE-C requires to be filled in if enabled for object metadata
generateSSECustomerKey().ifPresent(request::setSSECustomerKey);
ObjectMetadata meta = invoker.retryUntranslated("GET " + key, true,
ObjectMetadata meta = changeInvoker.retryUntranslated("GET " + key, true,
() -> {
incrementStatistic(OBJECT_METADATA_REQUESTS);
return s3.getObjectMetadata(request);
if (changeTracker != null) {
changeTracker.maybeApplyConstraint(request);
}
ObjectMetadata objectMetadata = s3.getObjectMetadata(request);
if (changeTracker != null) {
changeTracker.processMetadata(objectMetadata, operation);
}
return objectMetadata;
});
incrementReadOperations();
return meta;
@ -1802,7 +1888,8 @@ PutObjectResult putObjectDirect(PutObjectRequest putObjectRequest)
PutObjectResult result = s3.putObject(putObjectRequest);
incrementPutCompletedStatistics(true, len);
// update metadata
finishedWrite(putObjectRequest.getKey(), len);
finishedWrite(putObjectRequest.getKey(), len,
result.getETag(), result.getVersionId());
return result;
} catch (AmazonClientException e) {
incrementPutCompletedStatistics(false, len);
@ -2160,7 +2247,7 @@ public FileStatus[] innerListStatus(Path f) throws FileNotFoundException,
LOG.debug("List status for path: {}", path);
entryPoint(INVOCATION_LIST_STATUS);
List<FileStatus> result;
List<S3AFileStatus> result;
final FileStatus fileStatus = getFileStatus(path);
if (fileStatus.isDirectory()) {
@ -2419,11 +2506,11 @@ S3AFileStatus innerGetFileStatus(final Path f,
}
}
FileStatus msStatus = pm.getFileStatus();
S3AFileStatus msStatus = pm.getFileStatus();
if (needEmptyDirectoryFlag && msStatus.isDirectory()) {
if (pm.isEmptyDirectory() != Tristate.UNKNOWN) {
// We have a definitive true / false from MetadataStore, we are done.
return S3AFileStatus.fromFileStatus(msStatus, pm.isEmptyDirectory());
return msStatus;
} else {
DirListingMetadata children =
S3Guard.listChildrenWithTtl(metadataStore, path, ttlTimeProvider);
@ -2434,7 +2521,7 @@ S3AFileStatus innerGetFileStatus(final Path f,
}
} else {
// Either this is not a directory, or we don't care if it is empty
return S3AFileStatus.fromFileStatus(msStatus, pm.isEmptyDirectory());
return msStatus;
}
// If the metadata store has no children for it and it's not listed in
@ -2443,7 +2530,8 @@ S3AFileStatus innerGetFileStatus(final Path f,
try {
s3FileStatus = s3GetFileStatus(path, key, tombstones);
} catch (FileNotFoundException e) {
return S3AFileStatus.fromFileStatus(msStatus, Tristate.TRUE);
return S3AFileStatus.fromFileStatus(msStatus, Tristate.TRUE,
null, null);
}
// entry was found, save in S3Guard
return S3Guard.putAndReturn(metadataStore, s3FileStatus, instrumentation);
@ -2482,7 +2570,9 @@ private S3AFileStatus s3GetFileStatus(final Path path, String key,
dateToLong(meta.getLastModified()),
path,
getDefaultBlockSize(path),
username);
username,
meta.getETag(),
meta.getVersionId());
}
} catch (AmazonServiceException e) {
if (e.getStatusCode() != 404) {
@ -2509,7 +2599,9 @@ private S3AFileStatus s3GetFileStatus(final Path path, String key,
dateToLong(meta.getLastModified()),
path,
getDefaultBlockSize(path),
username);
username,
meta.getETag(),
meta.getVersionId());
}
} catch (AmazonServiceException e) {
if (e.getStatusCode() != 404) {
@ -2734,7 +2826,8 @@ UploadResult executePut(PutObjectRequest putObjectRequest,
UploadResult result = waitForUploadCompletion(key, info);
listener.uploadCompleted();
// post-write actions
finishedWrite(key, info.getLength());
finishedWrite(key, info.getLength(),
result.getETag(), result.getVersionId());
return result;
}
@ -2899,12 +2992,15 @@ public List<RoleModel.Statement> listAWSPolicyRules(
* @param srcKey source object path
* @param dstKey destination object path
* @param size object size
* @throws AmazonClientException on failures inside the AWS SDK
* @param srcAttributes S3 attributes of the source object
* @param readContext the read context
* @return the result of the copy
* @throws InterruptedIOException the operation was interrupted
* @throws IOException Other IO problems
*/
@Retries.RetryMixed
private void copyFile(String srcKey, String dstKey, long size)
@Retries.RetryTranslated
private CopyResult copyFile(String srcKey, String dstKey, long size,
S3ObjectAttributes srcAttributes, S3AReadOpContext readContext)
throws IOException, InterruptedIOException {
LOG.debug("copyFile {} -> {} ", srcKey, dstKey);
@ -2918,26 +3014,58 @@ private void copyFile(String srcKey, String dstKey, long size)
}
};
once("copyFile(" + srcKey + ", " + dstKey + ")", srcKey,
() -> {
ObjectMetadata srcom = getObjectMetadata(srcKey);
ChangeTracker changeTracker = new ChangeTracker(
keyToQualifiedPath(srcKey).toString(),
changeDetectionPolicy,
readContext.instrumentation.newInputStreamStatistics()
.getVersionMismatchCounter(),
srcAttributes);
String action = "copyFile(" + srcKey + ", " + dstKey + ")";
Invoker readInvoker = readContext.getReadInvoker();
ObjectMetadata srcom =
once(action, srcKey,
() ->
getObjectMetadata(srcKey, changeTracker, readInvoker, "copy"));
ObjectMetadata dstom = cloneObjectMetadata(srcom);
setOptionalObjectMetadata(dstom);
return readInvoker.retry(
action, srcKey,
true,
() -> {
CopyObjectRequest copyObjectRequest =
new CopyObjectRequest(bucket, srcKey, bucket, dstKey);
changeTracker.maybeApplyConstraint(copyObjectRequest);
setOptionalCopyObjectRequestParameters(copyObjectRequest);
copyObjectRequest.setCannedAccessControlList(cannedACL);
copyObjectRequest.setNewObjectMetadata(dstom);
Optional.ofNullable(srcom.getStorageClass())
.ifPresent(copyObjectRequest::setStorageClass);
Copy copy = transfers.copy(copyObjectRequest);
copy.addProgressListener(progressListener);
try {
copy.waitForCopyResult();
CopyOutcome copyOutcome = CopyOutcome.waitForCopy(copy);
InterruptedException interruptedException =
copyOutcome.getInterruptedException();
if (interruptedException != null) {
// copy interrupted: convert to an IOException.
throw (IOException)new InterruptedIOException(
"Interrupted copying " + srcKey
+ " to " + dstKey + ", cancelling")
.initCause(interruptedException);
}
SdkBaseException awsException = copyOutcome.getAwsException();
if (awsException != null) {
changeTracker.processException(awsException, "copy");
throw awsException;
}
CopyResult result = copyOutcome.getCopyResult();
changeTracker.processResponse(result);
incrementWriteOperations();
instrumentation.filesCopied(1, size);
} catch (InterruptedException e) {
throw new InterruptedIOException("Interrupted copying " + srcKey
+ " to " + dstKey + ", cancelling");
}
return result;
});
}
@ -3044,6 +3172,8 @@ private Optional<SSECustomerKey> generateSSECustomerKey() {
* </ol>
* @param key key written to
* @param length total length of file written
* @param eTag eTag of the written object
* @param versionId S3 object versionId of the written object
* @throws MetadataPersistenceException if metadata about the write could
* not be saved to the metadata store and
* fs.s3a.metadatastore.fail.on.write.error=true
@ -3051,7 +3181,7 @@ private Optional<SSECustomerKey> generateSSECustomerKey() {
@InterfaceAudience.Private
@Retries.RetryTranslated("Except if failOnMetadataWriteError=false, in which"
+ " case RetryExceptionsSwallowed")
void finishedWrite(String key, long length)
void finishedWrite(String key, long length, String eTag, String versionId)
throws MetadataPersistenceException {
LOG.debug("Finished write to {}, len {}", key, length);
Path p = keyToQualifiedPath(key);
@ -3064,7 +3194,7 @@ void finishedWrite(String key, long length)
S3Guard.addAncestors(metadataStore, p, username);
S3AFileStatus status = createUploadFileStatus(p,
S3AUtils.objectRepresentsDirectory(key, length), length,
getDefaultBlockSize(p), username);
getDefaultBlockSize(p), username, eTag, versionId);
S3Guard.putAndReturn(metadataStore, status, instrumentation);
}
} catch (IOException e) {
@ -3436,26 +3566,41 @@ public EtagChecksum getFileChecksum(Path f, final long length)
@Retries.OnceTranslated
public RemoteIterator<LocatedFileStatus> listFiles(Path f,
boolean recursive) throws FileNotFoundException, IOException {
return innerListFiles(f, recursive,
new Listing.AcceptFilesOnly(qualify(f)));
return toLocatedFileStatusIterator(innerListFiles(f, recursive,
new Listing.AcceptFilesOnly(qualify(f))));
}
private static RemoteIterator<LocatedFileStatus> toLocatedFileStatusIterator(
RemoteIterator<? extends LocatedFileStatus> iterator) {
return new RemoteIterator<LocatedFileStatus>() {
@Override
public boolean hasNext() throws IOException {
return iterator.hasNext();
}
@Override
public LocatedFileStatus next() throws IOException {
return iterator.next();
}
};
}
@Retries.OnceTranslated
public RemoteIterator<LocatedFileStatus> listFilesAndEmptyDirectories(Path f,
boolean recursive) throws IOException {
public RemoteIterator<S3ALocatedFileStatus> listFilesAndEmptyDirectories(
Path f, boolean recursive) throws IOException {
return innerListFiles(f, recursive,
new Listing.AcceptAllButS3nDirs());
}
@Retries.OnceTranslated
private RemoteIterator<LocatedFileStatus> innerListFiles(Path f, boolean
private RemoteIterator<S3ALocatedFileStatus> innerListFiles(Path f, boolean
recursive, Listing.FileStatusAcceptor acceptor) throws IOException {
entryPoint(INVOCATION_LIST_FILES);
Path path = qualify(f);
LOG.debug("listFiles({}, {})", path, recursive);
try {
// lookup dir triggers existence check
final FileStatus fileStatus = getFileStatus(path);
final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path);
if (fileStatus.isFile()) {
// simple case: File
LOG.debug("Path is a file");
@ -3467,7 +3612,7 @@ private RemoteIterator<LocatedFileStatus> innerListFiles(Path f, boolean
String delimiter = recursive ? null : "/";
LOG.debug("Requesting all entries under {} with delimiter '{}'",
key, delimiter);
final RemoteIterator<FileStatus> cachedFilesIterator;
final RemoteIterator<S3AFileStatus> cachedFilesIterator;
final Set<Path> tombstones;
if (recursive) {
final PathMetadata pm = metadataStore.get(path, true);
@ -3539,10 +3684,12 @@ public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f,
entryPoint(INVOCATION_LIST_LOCATED_STATUS);
Path path = qualify(f);
LOG.debug("listLocatedStatus({}, {}", path, filter);
return once("listLocatedStatus", path.toString(),
RemoteIterator<? extends LocatedFileStatus> iterator =
once("listLocatedStatus", path.toString(),
() -> {
// lookup dir triggers existence check
final FileStatus fileStatus = getFileStatus(path);
final S3AFileStatus fileStatus =
(S3AFileStatus) getFileStatus(path);
if (fileStatus.isFile()) {
// simple case: File
LOG.debug("Path is a file");
@ -3556,7 +3703,7 @@ public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f,
DirListingMetadata meta =
S3Guard.listChildrenWithTtl(metadataStore, path,
ttlTimeProvider);
final RemoteIterator<FileStatus> cachedFileStatusIterator =
final RemoteIterator<S3AFileStatus> cachedFileStatusIterator =
listing.createProvidedFileStatusIterator(
S3Guard.dirMetaToStatuses(meta), filter, acceptor);
return (allowAuthoritative && meta != null
@ -3571,20 +3718,21 @@ public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f,
cachedFileStatusIterator));
}
});
return toLocatedFileStatusIterator(iterator);
}
/**
* Build a {@link LocatedFileStatus} from a {@link FileStatus} instance.
* Build a {@link S3ALocatedFileStatus} from a {@link FileStatus} instance.
* @param status file status
* @return a located status with block locations set up from this FS.
* @throws IOException IO Problems.
*/
LocatedFileStatus toLocatedFileStatus(FileStatus status)
S3ALocatedFileStatus toLocatedFileStatus(S3AFileStatus status)
throws IOException {
return new LocatedFileStatus(status,
return new S3ALocatedFileStatus(status,
status.isFile() ?
getFileBlockLocations(status, 0, status.getLen())
: null);
: null, status.getETag(), status.getVersionId());
}
/**
@ -3743,17 +3891,38 @@ private FSDataInputStream select(final Path source,
// so the operation will fail if it is not there or S3Guard believes it has
// been deleted.
// validation of the file status are delegated to the binding.
final FileStatus fileStatus = getFileStatus(path);
final S3AFileStatus fileStatus = (S3AFileStatus) getFileStatus(path);
// readahead range can be dynamically set
long ra = options.getLong(READAHEAD_RANGE, readAhead);
S3ObjectAttributes objectAttributes = createObjectAttributes(
path, fileStatus.getETag(), fileStatus.getVersionId());
S3AReadOpContext readContext = createReadContext(fileStatus, inputPolicy,
changeDetectionPolicy, ra);
if (!fileStatus.isDirectory()) {
// check that the object metadata lines up with what is expected
// based on the object attributes (which may contain an eTag or
// versionId) from S3Guard
ChangeTracker changeTracker =
new ChangeTracker(uri.toString(),
changeDetectionPolicy,
readContext.instrumentation.newInputStreamStatistics()
.getVersionMismatchCounter(),
objectAttributes);
// will retry internally if wrong version detected
Invoker readInvoker = readContext.getReadInvoker();
getObjectMetadata(path, changeTracker, readInvoker, "select");
}
// build and execute the request
return selectBinding.select(
createReadContext(fileStatus, inputPolicy, changeDetectionPolicy, ra),
readContext,
expression,
options,
generateSSECustomerKey(),
createObjectAttributes(path));
objectAttributes);
}
/**

View File

@ -153,7 +153,8 @@ public S3AInputStream(S3AReadOpContext ctx,
this.serverSideEncryptionKey = s3Attributes.getServerSideEncryptionKey();
this.changeTracker = new ChangeTracker(uri,
ctx.getChangeDetectionPolicy(),
streamStatistics.getVersionMismatchCounter());
streamStatistics.getVersionMismatchCounter(),
s3Attributes);
setInputPolicy(ctx.getInputPolicy());
setReadahead(ctx.getReadahead());
}
@ -344,9 +345,12 @@ public boolean seekToNewSource(long targetPos) throws IOException {
private void lazySeek(long targetPos, long len) throws IOException {
// With S3Guard, the metadatastore gave us metadata for the file in
// open(), so we use a slightly different retry policy.
// open(), so we use a slightly different retry policy, but only on initial
// open. After that, an exception generally means the file has changed
// and there is no point retrying anymore.
Invoker invoker = context.getReadInvoker();
invoker.retry("lazySeek", pathStr, true,
invoker.maybeRetry(streamStatistics.openOperations == 0,
"lazySeek", pathStr, true,
() -> {
//For lazy seek
seekInStream(targetPos, len);

View File

@ -0,0 +1,63 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.LocatedFileStatus;
import static com.google.common.base.Preconditions.checkNotNull;
/**
* {@link LocatedFileStatus} extended to also carry ETag and object version ID.
*/
public class S3ALocatedFileStatus extends LocatedFileStatus {
private static final long serialVersionUID = 3597192103662929338L;
private final String eTag;
private final String versionId;
public S3ALocatedFileStatus(S3AFileStatus status, BlockLocation[] locations,
String eTag, String versionId) {
super(checkNotNull(status), locations);
this.eTag = eTag;
this.versionId = versionId;
}
public String getETag() {
return eTag;
}
public String getVersionId() {
return versionId;
}
// equals() and hashCode() overridden to avoid FindBugs warning.
// Base implementation is equality on Path only, which is still appropriate.
@Override
public boolean equals(Object o) {
return super.equals(o);
}
@Override
public int hashCode() {
return super.hashCode();
}
}

View File

@ -533,16 +533,20 @@ public static String stringify(AmazonS3Exception e) {
* @param summary summary from AWS
* @param blockSize block size to declare.
* @param owner owner of the file
* @param eTag S3 object eTag or null if unavailable
* @param versionId S3 object versionId or null if unavailable
* @return a status entry
*/
public static S3AFileStatus createFileStatus(Path keyPath,
S3ObjectSummary summary,
long blockSize,
String owner) {
String owner,
String eTag,
String versionId) {
long size = summary.getSize();
return createFileStatus(keyPath,
objectRepresentsDirectory(summary.getKey(), size),
size, summary.getLastModified(), blockSize, owner);
size, summary.getLastModified(), blockSize, owner, eTag, versionId);
}
/**
@ -555,22 +559,27 @@ public static S3AFileStatus createFileStatus(Path keyPath,
* @param size file length
* @param blockSize block size for file status
* @param owner Hadoop username
* @param eTag S3 object eTag or null if unavailable
* @param versionId S3 object versionId or null if unavailable
* @return a status entry
*/
public static S3AFileStatus createUploadFileStatus(Path keyPath,
boolean isDir, long size, long blockSize, String owner) {
boolean isDir, long size, long blockSize, String owner,
String eTag, String versionId) {
Date date = isDir ? null : new Date();
return createFileStatus(keyPath, isDir, size, date, blockSize, owner);
return createFileStatus(keyPath, isDir, size, date, blockSize, owner,
eTag, versionId);
}
/* Date 'modified' is ignored when isDir is true. */
private static S3AFileStatus createFileStatus(Path keyPath, boolean isDir,
long size, Date modified, long blockSize, String owner) {
long size, Date modified, long blockSize, String owner,
String eTag, String versionId) {
if (isDir) {
return new S3AFileStatus(Tristate.UNKNOWN, keyPath, owner);
} else {
return new S3AFileStatus(size, dateToLong(modified), keyPath, blockSize,
owner);
owner, eTag, versionId);
}
}

View File

@ -42,6 +42,7 @@ public S3GuardExistsRetryPolicy(Configuration conf) {
protected Map<Class<? extends Exception>, RetryPolicy> createExceptionMap() {
Map<Class<? extends Exception>, RetryPolicy> b = super.createExceptionMap();
b.put(FileNotFoundException.class, retryIdempotentCalls);
b.put(RemoteFileChangedException.class, retryIdempotentCalls);
return b;
}
}

View File

@ -34,16 +34,22 @@ public class S3ObjectAttributes {
private final String key;
private final S3AEncryptionMethods serverSideEncryptionAlgorithm;
private final String serverSideEncryptionKey;
private final String eTag;
private final String versionId;
public S3ObjectAttributes(
String bucket,
String key,
S3AEncryptionMethods serverSideEncryptionAlgorithm,
String serverSideEncryptionKey) {
String serverSideEncryptionKey,
String eTag,
String versionId) {
this.bucket = bucket;
this.key = key;
this.serverSideEncryptionAlgorithm = serverSideEncryptionAlgorithm;
this.serverSideEncryptionKey = serverSideEncryptionKey;
this.eTag = eTag;
this.versionId = versionId;
}
public String getBucket() {
@ -61,4 +67,12 @@ public S3AEncryptionMethods getServerSideEncryptionAlgorithm() {
public String getServerSideEncryptionKey() {
return serverSideEncryptionKey;
}
public String getETag() {
return eTag;
}
public String getVersionId() {
return versionId;
}
}

View File

@ -226,7 +226,8 @@ public String initiateMultiPartUpload(String destKey) throws IOException {
/**
* Finalize a multipart PUT operation.
* This completes the upload, and, if that works, calls
* {@link S3AFileSystem#finishedWrite(String, long)} to update the filesystem.
* {@link S3AFileSystem#finishedWrite(String, long, String, String)}
* to update the filesystem.
* Retry policy: retrying, translated.
* @param destKey destination of the commit
* @param uploadId multipart operation Id
@ -247,7 +248,8 @@ private CompleteMultipartUploadResult finalizeMultipartUpload(
throw new IOException(
"No upload parts in multipart upload to " + destKey);
}
CompleteMultipartUploadResult uploadResult = invoker.retry("Completing multipart commit", destKey,
CompleteMultipartUploadResult uploadResult =
invoker.retry("Completing multipart commit", destKey,
true,
retrying,
() -> {
@ -260,7 +262,8 @@ private CompleteMultipartUploadResult finalizeMultipartUpload(
new ArrayList<>(partETags)));
}
);
owner.finishedWrite(destKey, length);
owner.finishedWrite(destKey, length, uploadResult.getETag(),
uploadResult.getVersionId());
return uploadResult;
}

View File

@ -20,8 +20,11 @@
import java.util.Locale;
import com.amazonaws.services.s3.model.CopyObjectRequest;
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.transfer.model.CopyResult;
import com.google.common.annotations.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -30,6 +33,7 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.s3a.S3ObjectAttributes;
import org.apache.hadoop.fs.s3a.RemoteFileChangedException;
import static org.apache.hadoop.fs.s3a.Constants.*;
@ -47,7 +51,7 @@ public abstract class ChangeDetectionPolicy {
LoggerFactory.getLogger(ChangeDetectionPolicy.class);
@VisibleForTesting
public static final String CHANGE_DETECTED = "change detected";
public static final String CHANGE_DETECTED = "change detected on client";
private final Mode mode;
private final boolean requireVersion;
@ -200,6 +204,28 @@ public static ChangeDetectionPolicy createPolicy(final Mode mode,
public abstract String getRevisionId(ObjectMetadata objectMetadata,
String uri);
/**
* Like {{@link #getRevisionId(ObjectMetadata, String)}}, but retrieves the
* revision identifier from {@link S3ObjectAttributes}.
*
* @param s3Attributes the object attributes
* @return the revisionId string as interpreted by this policy, or potentially
* null if the attribute is unavailable (such as when the policy says to use
* versionId but object versioning is not enabled for the bucket).
*/
public abstract String getRevisionId(S3ObjectAttributes s3Attributes);
/**
* Like {{@link #getRevisionId(ObjectMetadata, String)}}, but retrieves the
* revision identifier from {@link CopyResult}.
*
* @param copyResult the copy result
* @return the revisionId string as interpreted by this policy, or potentially
* null if the attribute is unavailable (such as when the policy says to use
* versionId but object versioning is not enabled for the bucket).
*/
public abstract String getRevisionId(CopyResult copyResult);
/**
* Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId}
* as a server-side qualification on the {@code GetObjectRequest}.
@ -210,6 +236,26 @@ public abstract String getRevisionId(ObjectMetadata objectMetadata,
public abstract void applyRevisionConstraint(GetObjectRequest request,
String revisionId);
/**
* Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId}
* as a server-side qualification on the {@code CopyObjectRequest}.
*
* @param request the request
* @param revisionId the revision id
*/
public abstract void applyRevisionConstraint(CopyObjectRequest request,
String revisionId);
/**
* Applies the given {@link #getRevisionId(ObjectMetadata, String) revisionId}
* as a server-side qualification on the {@code GetObjectMetadataRequest}.
*
* @param request the request
* @param revisionId the revision id
*/
public abstract void applyRevisionConstraint(GetObjectMetadataRequest request,
String revisionId);
/**
* Takes appropriate action based on {@link #getMode() mode} when a change has
* been detected.
@ -234,6 +280,7 @@ public ImmutablePair<Boolean, RemoteFileChangedException> onChangeDetected(
long position,
String operation,
long timesAlreadyDetected) {
String positionText = position >= 0 ? (" at " + position) : "";
switch (mode) {
case None:
// something changed; we don't care.
@ -242,8 +289,9 @@ public ImmutablePair<Boolean, RemoteFileChangedException> onChangeDetected(
if (timesAlreadyDetected == 0) {
// only warn on the first detection to avoid a noisy log
LOG.warn(
String.format("%s change detected on %s %s at %d. Expected %s got %s",
getSource(), operation, uri, position, revisionId,
String.format(
"%s change detected on %s %s%s. Expected %s got %s",
getSource(), operation, uri, positionText, revisionId,
newRevisionId));
return new ImmutablePair<>(true, null);
}
@ -251,15 +299,16 @@ public ImmutablePair<Boolean, RemoteFileChangedException> onChangeDetected(
case Client:
case Server:
default:
// mode == Client (or Server, but really won't be called for Server)
// mode == Client or Server; will trigger on version failures
// of getObjectMetadata even on server.
return new ImmutablePair<>(true,
new RemoteFileChangedException(uri,
operation,
String.format("%s "
+ CHANGE_DETECTED
+ " while reading at position %s."
+ " during %s%s."
+ " Expected %s got %s",
getSource(), position, revisionId, newRevisionId)));
getSource(), operation, positionText, revisionId, newRevisionId)));
}
}
@ -277,12 +326,39 @@ public String getRevisionId(ObjectMetadata objectMetadata, String uri) {
return objectMetadata.getETag();
}
@Override
public String getRevisionId(S3ObjectAttributes s3Attributes) {
return s3Attributes.getETag();
}
@Override
public String getRevisionId(CopyResult copyResult) {
return copyResult.getETag();
}
@Override
public void applyRevisionConstraint(GetObjectRequest request,
String revisionId) {
LOG.debug("Restricting request to etag {}", revisionId);
if (revisionId != null) {
LOG.debug("Restricting get request to etag {}", revisionId);
request.withMatchingETagConstraint(revisionId);
}
}
@Override
public void applyRevisionConstraint(CopyObjectRequest request,
String revisionId) {
if (revisionId != null) {
LOG.debug("Restricting copy request to etag {}", revisionId);
request.withMatchingETagConstraint(revisionId);
}
}
@Override
public void applyRevisionConstraint(GetObjectMetadataRequest request,
String revisionId) {
// GetObjectMetadataRequest doesn't support eTag qualification
}
@Override
public Source getSource() {
@ -323,12 +399,42 @@ public String getRevisionId(ObjectMetadata objectMetadata, String uri) {
return versionId;
}
@Override
public String getRevisionId(S3ObjectAttributes s3Attributes) {
return s3Attributes.getVersionId();
}
@Override
public String getRevisionId(CopyResult copyResult) {
return copyResult.getVersionId();
}
@Override
public void applyRevisionConstraint(GetObjectRequest request,
String revisionId) {
LOG.debug("Restricting request to version {}", revisionId);
if (revisionId != null) {
LOG.debug("Restricting get request to version {}", revisionId);
request.withVersionId(revisionId);
}
}
@Override
public void applyRevisionConstraint(CopyObjectRequest request,
String revisionId) {
if (revisionId != null) {
LOG.debug("Restricting copy request to version {}", revisionId);
request.withSourceVersionId(revisionId);
}
}
@Override
public void applyRevisionConstraint(GetObjectMetadataRequest request,
String revisionId) {
if (revisionId != null) {
LOG.debug("Restricting metadata request to version {}", revisionId);
request.withVersionId(revisionId);
}
}
@Override
public Source getSource() {
@ -361,12 +467,34 @@ public String getRevisionId(final ObjectMetadata objectMetadata,
return null;
}
@Override
public String getRevisionId(final S3ObjectAttributes s3ObjectAttributes) {
return null;
}
@Override
public String getRevisionId(CopyResult copyResult) {
return null;
}
@Override
public void applyRevisionConstraint(final GetObjectRequest request,
final String revisionId) {
}
@Override
public void applyRevisionConstraint(CopyObjectRequest request,
String revisionId) {
}
@Override
public void applyRevisionConstraint(GetObjectMetadataRequest request,
String revisionId) {
}
@Override
public String toString() {
return "NoChangeDetection";

View File

@ -20,11 +20,15 @@
import java.util.concurrent.atomic.AtomicLong;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.SdkBaseException;
import com.amazonaws.services.s3.model.CopyObjectRequest;
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.transfer.model.CopyResult;
import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.fs.s3a.NoVersionAttributeException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -32,14 +36,18 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.s3a.NoVersionAttributeException;
import org.apache.hadoop.fs.s3a.RemoteFileChangedException;
import org.apache.hadoop.fs.s3a.S3ObjectAttributes;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.apache.http.HttpStatus.SC_PRECONDITION_FAILED;
/**
* Change tracking for input streams: the revision ID/etag
* the previous request is recorded and when the next request comes in,
* it is compared.
* Change tracking for input streams: the version ID or etag of the object is
* tracked and compared on open/re-open. An initial version ID or etag may or
* may not be available, depending on usage (e.g. if S3Guard is utilized).
*
* Self-contained for testing and use in different streams.
*/
@InterfaceAudience.Private
@ -49,7 +57,7 @@ public class ChangeTracker {
private static final Logger LOG =
LoggerFactory.getLogger(ChangeTracker.class);
public static final String CHANGE_REPORTED_BY_S3 = "reported by S3";
public static final String CHANGE_REPORTED_BY_S3 = "Change reported by S3";
/** Policy to use. */
private final ChangeDetectionPolicy policy;
@ -76,13 +84,20 @@ public class ChangeTracker {
* @param uri URI of object being tracked
* @param policy policy to track.
* @param versionMismatches reference to the version mismatch counter
* @param s3ObjectAttributes attributes of the object, potentially including
* an eTag or versionId to match depending on {@code policy}
*/
public ChangeTracker(final String uri,
final ChangeDetectionPolicy policy,
final AtomicLong versionMismatches) {
final AtomicLong versionMismatches,
final S3ObjectAttributes s3ObjectAttributes) {
this.policy = checkNotNull(policy);
this.uri = uri;
this.versionMismatches = versionMismatches;
this.revisionId = policy.getRevisionId(s3ObjectAttributes);
if (revisionId != null) {
LOG.debug("Revision ID for object at {}: {}", uri, revisionId);
}
}
public String getRevisionId() {
@ -115,6 +130,33 @@ public boolean maybeApplyConstraint(
return false;
}
/**
* Apply any revision control set by the policy if it is to be
* enforced on the server.
* @param request request to modify
* @return true iff a constraint was added.
*/
public boolean maybeApplyConstraint(
final CopyObjectRequest request) {
if (policy.getMode() == ChangeDetectionPolicy.Mode.Server
&& revisionId != null) {
policy.applyRevisionConstraint(request, revisionId);
return true;
}
return false;
}
public boolean maybeApplyConstraint(
final GetObjectMetadataRequest request) {
if (policy.getMode() == ChangeDetectionPolicy.Mode.Server
&& revisionId != null) {
policy.applyRevisionConstraint(request, revisionId);
return true;
}
return false;
}
/**
* Process the response from the server for validation against the
@ -135,29 +177,106 @@ public void processResponse(final S3Object object,
// object was not returned.
versionMismatches.incrementAndGet();
throw new RemoteFileChangedException(uri, operation,
String.format("%s change "
+ CHANGE_REPORTED_BY_S3
+ " while reading"
String.format(CHANGE_REPORTED_BY_S3
+ " during %s"
+ " at position %s."
+ " Version %s was unavailable",
getSource(),
+ " %s %s was unavailable",
operation,
pos,
getSource(),
getRevisionId()));
} else {
throw new PathIOException(uri, "No data returned from GET request");
}
}
final ObjectMetadata metadata = object.getObjectMetadata();
processMetadata(object.getObjectMetadata(), operation);
}
/**
* Process the response from the server for validation against the
* change policy.
* @param copyResult result of a copy operation
* @throws PathIOException raised on failure
* @throws RemoteFileChangedException if the remote file has changed.
*/
public void processResponse(final CopyResult copyResult)
throws PathIOException {
// ETag (sometimes, depending on encryption and/or multipart) is not the
// same on the copied object as the original. Version Id seems to never
// be the same on the copy. As such, there isn't really anything that
// can be verified on the response, except that a revision ID is present
// if required.
String newRevisionId = policy.getRevisionId(copyResult);
LOG.debug("Copy result {}: {}", policy.getSource(), newRevisionId);
if (newRevisionId == null && policy.isRequireVersion()) {
throw new NoVersionAttributeException(uri, String.format(
"Change detection policy requires %s",
policy.getSource()));
}
}
/**
* Process an exception generated against the change policy.
* If the exception indicates the file has changed, this method throws
* {@code RemoteFileChangedException} with the original exception as the
* cause.
* @param e the exception
* @param operation the operation performed when the exception was
* generated (e.g. "copy", "read", "select").
* @throws RemoteFileChangedException if the remote file has changed.
*/
public void processException(SdkBaseException e, String operation) throws
RemoteFileChangedException {
if (e instanceof AmazonServiceException) {
AmazonServiceException serviceException = (AmazonServiceException) e;
// This isn't really going to be hit due to
// https://github.com/aws/aws-sdk-java/issues/1644
if (serviceException.getStatusCode() == SC_PRECONDITION_FAILED) {
versionMismatches.incrementAndGet();
throw new RemoteFileChangedException(uri, operation, String.format(
RemoteFileChangedException.PRECONDITIONS_FAILED
+ " on %s."
+ " Version %s was unavailable",
getSource(),
getRevisionId()),
serviceException);
}
}
}
/**
* Process metadata response from server for validation against the change
* policy.
* @param metadata metadata returned from server
* @param operation operation in progress
* @throws PathIOException raised on failure
* @throws RemoteFileChangedException if the remote file has changed.
*/
public void processMetadata(final ObjectMetadata metadata,
final String operation) throws PathIOException {
final String newRevisionId = policy.getRevisionId(metadata, uri);
processNewRevision(newRevisionId, operation, -1);
}
/**
* Validate a revision from the server against our expectations.
* @param newRevisionId new revision.
* @param operation operation in progress
* @param pos offset in the file; -1 for "none"
* @throws PathIOException raised on failure
* @throws RemoteFileChangedException if the remote file has changed.
*/
private void processNewRevision(final String newRevisionId,
final String operation, final long pos) throws PathIOException {
if (newRevisionId == null && policy.isRequireVersion()) {
throw new NoVersionAttributeException(uri, String.format(
"Change detection policy requires %s",
policy.getSource()));
}
if (revisionId == null) {
// revisionId is null on first (re)open. Pin it so change can be detected
// if object has been updated
// revisionId may be null on first (re)open. Pin it so change can be
// detected if object has been updated
LOG.debug("Setting revision ID for object at {}: {}",
uri, newRevisionId);
revisionId = newRevisionId;

View File

@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.impl;
import com.amazonaws.SdkBaseException;
import com.amazonaws.services.s3.transfer.Copy;
import com.amazonaws.services.s3.transfer.model.CopyResult;
/**
* Extracts the outcome of a TransferManager-executed copy operation.
*/
public final class CopyOutcome {
/**
* Result of a successful copy.
*/
private final CopyResult copyResult;
/** the copy was interrupted. */
private final InterruptedException interruptedException;
/**
* The copy raised an AWS Exception of some form.
*/
private final SdkBaseException awsException;
public CopyOutcome(CopyResult copyResult,
InterruptedException interruptedException,
SdkBaseException awsException) {
this.copyResult = copyResult;
this.interruptedException = interruptedException;
this.awsException = awsException;
}
public CopyResult getCopyResult() {
return copyResult;
}
public InterruptedException getInterruptedException() {
return interruptedException;
}
public SdkBaseException getAwsException() {
return awsException;
}
/**
* Calls {@code Copy.waitForCopyResult()} to await the result, converts
* it to a copy outcome.
* Exceptions caught and
* @param copy the copy operation.
* @return the outcome.
*/
public static CopyOutcome waitForCopy(Copy copy) {
try {
CopyResult result = copy.waitForCopyResult();
return new CopyOutcome(result, null, null);
} catch (SdkBaseException e) {
return new CopyOutcome(null, null, e);
} catch (InterruptedException e) {
return new CopyOutcome(null, e, null);
}
}
}

View File

@ -18,7 +18,7 @@
package org.apache.hadoop.fs.s3a.s3guard;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.Tristate;
/**
@ -36,18 +36,18 @@ public DDBPathMetadata(PathMetadata pmd) {
this.setLastUpdated(pmd.getLastUpdated());
}
public DDBPathMetadata(FileStatus fileStatus) {
public DDBPathMetadata(S3AFileStatus fileStatus) {
super(fileStatus);
this.isAuthoritativeDir = false;
}
public DDBPathMetadata(FileStatus fileStatus, Tristate isEmptyDir,
public DDBPathMetadata(S3AFileStatus fileStatus, Tristate isEmptyDir,
boolean isDeleted) {
super(fileStatus, isEmptyDir, isDeleted);
this.isAuthoritativeDir = false;
}
public DDBPathMetadata(FileStatus fileStatus, Tristate isEmptyDir,
public DDBPathMetadata(S3AFileStatus fileStatus, Tristate isEmptyDir,
boolean isDeleted, boolean isAuthoritativeDir, long lastUpdated) {
super(fileStatus, isEmptyDir, isDeleted);
this.isAuthoritativeDir = isAuthoritativeDir;

View File

@ -28,9 +28,9 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
/**
* {@code DescendantsIterator} is a {@link RemoteIterator} that implements
@ -83,7 +83,7 @@
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class DescendantsIterator implements RemoteIterator<FileStatus> {
public class DescendantsIterator implements RemoteIterator<S3AFileStatus> {
private final MetadataStore metadataStore;
private final Queue<PathMetadata> queue = new LinkedList<>();
@ -121,7 +121,7 @@ public boolean hasNext() throws IOException {
}
@Override
public FileStatus next() throws IOException {
public S3AFileStatus next() throws IOException {
if (!hasNext()) {
throw new NoSuchElementException("No more descendants.");
}

View File

@ -34,6 +34,7 @@
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.Tristate;
/**
@ -61,7 +62,7 @@ public class DirListingMetadata extends ExpirableMetadata {
* Create a directory listing metadata container.
*
* @param path Path of the directory. If this path has a host component, then
* all paths added later via {@link #put(FileStatus)} must also have
* all paths added later via {@link #put(S3AFileStatus)} must also have
* the same host.
* @param listing Entries in the directory.
* @param isAuthoritative true iff listing is the full contents of the
@ -225,7 +226,7 @@ public void remove(Path childPath) {
* @return true if the status was added or replaced with a new value. False
* if the same FileStatus value was already present.
*/
public boolean put(FileStatus childFileStatus) {
public boolean put(S3AFileStatus childFileStatus) {
Preconditions.checkNotNull(childFileStatus,
"childFileStatus must be non-null");
Path childPath = childStatusToPathKey(childFileStatus);

View File

@ -88,6 +88,7 @@
import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.Invoker;
import org.apache.hadoop.fs.s3a.Retries;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3AInstrumentation;
import org.apache.hadoop.fs.s3a.S3AUtils;
@ -129,6 +130,14 @@
* This attribute is meaningful only to file items.</li>
* <li> optional long attribute revealing block size of the file.
* This attribute is meaningful only to file items.</li>
* <li> optional string attribute tracking the s3 eTag of the file.
* May be absent if the metadata was entered with a version of S3Guard
* before this was tracked.
* This attribute is meaningful only to file items.</li>
* <li> optional string attribute tracking the s3 versionId of the file.
* May be absent if the metadata was entered with a version of S3Guard
* before this was tracked.
* This attribute is meaningful only to file items.</li>
* </ul>
*
* The DynamoDB partition key is the parent, and the range key is the child.
@ -155,20 +164,20 @@
* This is persisted to a single DynamoDB table as:
*
* <pre>
* =========================================================================
* | parent | child | is_dir | mod_time | len | ... |
* =========================================================================
* | /bucket | dir1 | true | | | |
* | /bucket/dir1 | dir2 | true | | | |
* | /bucket/dir1 | dir3 | true | | | |
* | /bucket/dir1/dir2 | file1 | | 100 | 111 | |
* | /bucket/dir1/dir2 | file2 | | 200 | 222 | |
* | /bucket/dir1/dir3 | dir4 | true | | | |
* | /bucket/dir1/dir3 | dir5 | true | | | |
* | /bucket/dir1/dir3/dir4 | file3 | | 300 | 333 | |
* | /bucket/dir1/dir3/dir5 | file4 | | 400 | 444 | |
* | /bucket/dir1/dir3 | dir6 | true | | | |
* =========================================================================
* ====================================================================================
* | parent | child | is_dir | mod_time | len | etag | ver_id | ... |
* ====================================================================================
* | /bucket | dir1 | true | | | | | |
* | /bucket/dir1 | dir2 | true | | | | | |
* | /bucket/dir1 | dir3 | true | | | | | |
* | /bucket/dir1/dir2 | file1 | | 100 | 111 | abc | mno | |
* | /bucket/dir1/dir2 | file2 | | 200 | 222 | def | pqr | |
* | /bucket/dir1/dir3 | dir4 | true | | | | | |
* | /bucket/dir1/dir3 | dir5 | true | | | | | |
* | /bucket/dir1/dir3/dir4 | file3 | | 300 | 333 | ghi | stu | |
* | /bucket/dir1/dir3/dir5 | file4 | | 400 | 444 | jkl | vwx | |
* | /bucket/dir1/dir3 | dir6 | true | | | | | |
* ====================================================================================
* </pre>
*
* This choice of schema is efficient for read access patterns.
@ -618,16 +627,15 @@ private DDBPathMetadata innerGet(Path path, boolean wantEmptyDirectoryFlag)
}
/**
* Make a FileStatus object for a directory at given path. The FileStatus
* only contains what S3A needs, and omits mod time since S3A uses its own
* implementation which returns current system time.
* @param owner username of owner
* Make a S3AFileStatus object for a directory at given path.
* The FileStatus only contains what S3A needs, and omits mod time
* since S3A uses its own implementation which returns current system time.
* @param dirOwner username of owner
* @param path path to dir
* @return new FileStatus
* @return new S3AFileStatus
*/
private FileStatus makeDirStatus(String owner, Path path) {
return new FileStatus(0, true, 1, 0, 0, 0, null,
owner, null, path);
private S3AFileStatus makeDirStatus(String dirOwner, Path path) {
return new S3AFileStatus(Tristate.UNKNOWN, path, dirOwner);
}
@Override
@ -710,7 +718,7 @@ Collection<DDBPathMetadata> completeAncestry(
while (!parent.isRoot() && !ancestry.containsKey(parent)) {
LOG.debug("auto-create ancestor path {} for child path {}",
parent, path);
final FileStatus status = makeDirStatus(parent, username);
final S3AFileStatus status = makeDirStatus(parent, username);
ancestry.put(parent, new DDBPathMetadata(status, Tristate.FALSE,
false));
parent = parent.getParent();
@ -915,7 +923,7 @@ Collection<DDBPathMetadata> fullPathsToPut(DDBPathMetadata meta)
while (path != null && !path.isRoot()) {
final Item item = getConsistentItem(path);
if (!itemExists(item)) {
final FileStatus status = makeDirStatus(path, username);
final S3AFileStatus status = makeDirStatus(path, username);
metasToPut.add(new DDBPathMetadata(status, Tristate.FALSE, false,
meta.isAuthoritativeDir(), meta.getLastUpdated()));
path = path.getParent();
@ -938,9 +946,8 @@ private boolean itemExists(Item item) {
}
/** Create a directory FileStatus using current system time as mod time. */
static FileStatus makeDirStatus(Path f, String owner) {
return new FileStatus(0, true, 1, 0, System.currentTimeMillis(), 0,
null, owner, owner, f);
static S3AFileStatus makeDirStatus(Path f, String owner) {
return new S3AFileStatus(Tristate.UNKNOWN, f, owner);
}
/**

View File

@ -28,6 +28,7 @@
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.Tristate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -231,7 +232,7 @@ public void move(Collection<Path> pathsToDelete,
public void put(PathMetadata meta) throws IOException {
Preconditions.checkNotNull(meta);
FileStatus status = meta.getFileStatus();
S3AFileStatus status = meta.getFileStatus();
Path path = standardize(status.getPath());
synchronized (this) {

View File

@ -33,9 +33,9 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
/**
* {@code MetadataStoreListFilesIterator} is a {@link RemoteIterator} that
@ -85,14 +85,14 @@
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class MetadataStoreListFilesIterator implements
RemoteIterator<FileStatus> {
RemoteIterator<S3AFileStatus> {
public static final Logger LOG = LoggerFactory.getLogger(
MetadataStoreListFilesIterator.class);
private final boolean allowAuthoritative;
private final MetadataStore metadataStore;
private final Set<Path> tombstones = new HashSet<>();
private Iterator<FileStatus> leafNodesIterator = null;
private Iterator<S3AFileStatus> leafNodesIterator = null;
public MetadataStoreListFilesIterator(MetadataStore ms, PathMetadata meta,
boolean allowAuthoritative) throws IOException {
@ -104,7 +104,7 @@ public MetadataStoreListFilesIterator(MetadataStore ms, PathMetadata meta,
private void prefetch(PathMetadata meta) throws IOException {
final Queue<PathMetadata> queue = new LinkedList<>();
final Collection<FileStatus> leafNodes = new ArrayList<>();
final Collection<S3AFileStatus> leafNodes = new ArrayList<>();
if (meta != null) {
final Path path = meta.getFileStatus().getPath();
@ -121,7 +121,7 @@ private void prefetch(PathMetadata meta) throws IOException {
while(!queue.isEmpty()) {
PathMetadata nextMetadata = queue.poll();
FileStatus nextStatus = nextMetadata.getFileStatus();
S3AFileStatus nextStatus = nextMetadata.getFileStatus();
if (nextStatus.isFile()) {
// All files are leaf nodes by definition
leafNodes.add(nextStatus);
@ -159,7 +159,7 @@ public boolean hasNext() {
}
@Override
public FileStatus next() {
public S3AFileStatus next() {
return leafNodesIterator.next();
}

View File

@ -21,8 +21,8 @@
import com.google.common.base.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.Tristate;
/**
@ -33,7 +33,7 @@
@InterfaceStability.Evolving
public class PathMetadata extends ExpirableMetadata {
private final FileStatus fileStatus;
private S3AFileStatus fileStatus;
private Tristate isEmptyDirectory;
private boolean isDeleted;
@ -43,24 +43,25 @@ public class PathMetadata extends ExpirableMetadata {
* @return the entry.
*/
public static PathMetadata tombstone(Path path) {
long now = System.currentTimeMillis();
FileStatus status = new FileStatus(0, false, 0, 0, now, path);
return new PathMetadata(status, Tristate.UNKNOWN, true);
S3AFileStatus s3aStatus = new S3AFileStatus(0,
System.currentTimeMillis(), path, 0, null,
null, null);
return new PathMetadata(s3aStatus, Tristate.UNKNOWN, true);
}
/**
* Creates a new {@code PathMetadata} containing given {@code FileStatus}.
* @param fileStatus file status containing an absolute path.
*/
public PathMetadata(FileStatus fileStatus) {
this(fileStatus, Tristate.UNKNOWN);
public PathMetadata(S3AFileStatus fileStatus) {
this(fileStatus, Tristate.UNKNOWN, false);
}
public PathMetadata(FileStatus fileStatus, Tristate isEmptyDir) {
public PathMetadata(S3AFileStatus fileStatus, Tristate isEmptyDir) {
this(fileStatus, isEmptyDir, false);
}
public PathMetadata(FileStatus fileStatus, Tristate isEmptyDir, boolean
public PathMetadata(S3AFileStatus fileStatus, Tristate isEmptyDir, boolean
isDeleted) {
Preconditions.checkNotNull(fileStatus, "fileStatus must be non-null");
Preconditions.checkNotNull(fileStatus.getPath(), "fileStatus path must be" +
@ -75,7 +76,7 @@ public PathMetadata(FileStatus fileStatus, Tristate isEmptyDir, boolean
/**
* @return {@code FileStatus} contained in this {@code PathMetadata}.
*/
public final FileStatus getFileStatus() {
public final S3AFileStatus getFileStatus() {
return fileStatus;
}
@ -91,6 +92,7 @@ public Tristate isEmptyDirectory() {
void setIsEmptyDirectory(Tristate isEmptyDirectory) {
this.isEmptyDirectory = isEmptyDirectory;
fileStatus.setIsEmptyDirectory(isEmptyDirectory);
}
public boolean isDeleted() {
@ -128,10 +130,11 @@ public String toString() {
* @param sb target StringBuilder
*/
public void prettyPrint(StringBuilder sb) {
sb.append(String.format("%-5s %-20s %-7d %-8s %-6s",
sb.append(String.format("%-5s %-20s %-7d %-8s %-6s %-20s %-20s",
fileStatus.isDirectory() ? "dir" : "file",
fileStatus.getPath().toString(), fileStatus.getLen(),
isEmptyDirectory.name(), isDeleted));
isEmptyDirectory.name(), isDeleted,
fileStatus.getETag(), fileStatus.getVersionId()));
sb.append(fileStatus);
}

View File

@ -40,9 +40,9 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.Tristate;
/**
@ -70,6 +70,8 @@ final class PathMetadataDynamoDBTranslation {
static final String IS_DELETED = "is_deleted";
static final String IS_AUTHORITATIVE = "is_authoritative";
static final String LAST_UPDATED = "last_updated";
static final String ETAG = "etag";
static final String VERSION_ID = "version_id";
/** Used while testing backward compatibility. */
@VisibleForTesting
@ -135,7 +137,7 @@ static DDBPathMetadata itemToPathMetadata(Item item, String username) {
boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR);
boolean isAuthoritativeDir = false;
final FileStatus fileStatus;
final S3AFileStatus fileStatus;
long lastUpdated = 0;
if (isDir) {
isAuthoritativeDir = !IGNORED_FIELDS.contains(IS_AUTHORITATIVE)
@ -146,8 +148,10 @@ static DDBPathMetadata itemToPathMetadata(Item item, String username) {
long len = item.hasAttribute(FILE_LENGTH) ? item.getLong(FILE_LENGTH) : 0;
long modTime = item.hasAttribute(MOD_TIME) ? item.getLong(MOD_TIME) : 0;
long block = item.hasAttribute(BLOCK_SIZE) ? item.getLong(BLOCK_SIZE) : 0;
fileStatus = new FileStatus(len, false, 1, block, modTime, 0, null,
username, username, path);
String eTag = item.getString(ETAG);
String versionId = item.getString(VERSION_ID);
fileStatus = new S3AFileStatus(
len, modTime, path, block, username, eTag, versionId);
}
lastUpdated =
!IGNORED_FIELDS.contains(LAST_UPDATED)
@ -172,7 +176,7 @@ static DDBPathMetadata itemToPathMetadata(Item item, String username) {
*/
static Item pathMetadataToItem(DDBPathMetadata meta) {
Preconditions.checkNotNull(meta);
final FileStatus status = meta.getFileStatus();
final S3AFileStatus status = meta.getFileStatus();
final Item item = new Item().withPrimaryKey(pathToKey(status.getPath()));
if (status.isDirectory()) {
item.withBoolean(IS_DIR, true);
@ -183,6 +187,12 @@ static Item pathMetadataToItem(DDBPathMetadata meta) {
item.withLong(FILE_LENGTH, status.getLen())
.withLong(MOD_TIME, status.getModificationTime())
.withLong(BLOCK_SIZE, status.getBlockSize());
if (status.getETag() != null) {
item.withString(ETAG, status.getETag());
}
if (status.getVersionId() != null) {
item.withString(VERSION_ID, status.getVersionId());
}
}
item.withBoolean(IS_DELETED, meta.isDeleted());

View File

@ -68,7 +68,7 @@ public final class S3Guard {
static final Class<? extends DynamoDBClientFactory>
S3GUARD_DDB_CLIENT_FACTORY_IMPL_DEFAULT =
DynamoDBClientFactory.DefaultDynamoDBClientFactory.class;
private static final FileStatus[] EMPTY_LISTING = new FileStatus[0];
private static final S3AFileStatus[] EMPTY_LISTING = new S3AFileStatus[0];
// Utility class. All static functions.
private S3Guard() { }
@ -164,7 +164,7 @@ public static S3AFileStatus putAndReturn(MetadataStore ms,
* @param dirMeta directory listing -may be null
* @return a possibly-empty array of file status entries
*/
public static FileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) {
public static S3AFileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) {
if (dirMeta == null) {
return EMPTY_LISTING;
}
@ -178,7 +178,7 @@ public static FileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) {
}
}
return statuses.toArray(new FileStatus[0]);
return statuses.toArray(new S3AFileStatus[0]);
}
/**
@ -201,7 +201,7 @@ public static FileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) {
* @throws IOException if metadata store update failed
*/
public static FileStatus[] dirListingUnion(MetadataStore ms, Path path,
List<FileStatus> backingStatuses, DirListingMetadata dirMeta,
List<S3AFileStatus> backingStatuses, DirListingMetadata dirMeta,
boolean isAuthoritative, ITtlTimeProvider timeProvider)
throws IOException {
@ -232,7 +232,7 @@ public static FileStatus[] dirListingUnion(MetadataStore ms, Path path,
pm -> pm.getFileStatus().getPath(), PathMetadata::getFileStatus)
);
for (FileStatus s : backingStatuses) {
for (S3AFileStatus s : backingStatuses) {
if (deleted.contains(s.getPath())) {
continue;
}
@ -323,7 +323,7 @@ public static void makeDirsOrdered(MetadataStore ms, List<Path> dirs,
* [/a/b/file0, /a/b/file1, /a/b/file2, /a/b/file3], isAuthoritative =
* true
*/
FileStatus prevStatus = null;
S3AFileStatus prevStatus = null;
// Use new batched put to reduce round trips.
List<PathMetadata> pathMetas = new ArrayList<>(dirs.size());
@ -334,8 +334,8 @@ public static void makeDirsOrdered(MetadataStore ms, List<Path> dirs,
boolean isLeaf = (prevStatus == null);
Path f = dirs.get(i);
assertQualified(f);
FileStatus status =
createUploadFileStatus(f, true, 0, 0, owner);
S3AFileStatus status =
createUploadFileStatus(f, true, 0, 0, owner, null, null);
// We only need to put a DirListingMetadata if we are setting
// authoritative bit
@ -383,7 +383,8 @@ public static void addMoveDir(MetadataStore ms, Collection<Path> srcPaths,
}
assertQualified(srcPath, dstPath);
FileStatus dstStatus = createUploadFileStatus(dstPath, true, 0, 0, owner);
S3AFileStatus dstStatus = createUploadFileStatus(dstPath, true, 0,
0, owner, null, null);
addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus);
}
@ -399,16 +400,18 @@ public static void addMoveDir(MetadataStore ms, Collection<Path> srcPaths,
* @param size length of file moved
* @param blockSize blocksize to associate with destination file
* @param owner file owner to use in created records
* @param eTag the s3 object eTag of file moved
* @param versionId the s3 object versionId of file moved
*/
public static void addMoveFile(MetadataStore ms, Collection<Path> srcPaths,
Collection<PathMetadata> dstMetas, Path srcPath, Path dstPath,
long size, long blockSize, String owner) {
long size, long blockSize, String owner, String eTag, String versionId) {
if (isNullMetadataStore(ms)) {
return;
}
assertQualified(srcPath, dstPath);
FileStatus dstStatus = createUploadFileStatus(dstPath, false,
size, blockSize, owner);
S3AFileStatus dstStatus = createUploadFileStatus(dstPath, false,
size, blockSize, owner, eTag, versionId);
addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus);
}
@ -465,9 +468,8 @@ public static void addAncestors(MetadataStore metadataStore,
while (!parent.isRoot()) {
PathMetadata directory = metadataStore.get(parent);
if (directory == null || directory.isDeleted()) {
FileStatus status = new FileStatus(0, true, 1, 0, 0, 0, null, username,
null, parent);
PathMetadata meta = new PathMetadata(status, Tristate.FALSE, false);
S3AFileStatus s3aStatus = new S3AFileStatus(Tristate.FALSE, parent, username);
PathMetadata meta = new PathMetadata(s3aStatus, Tristate.FALSE, false);
newDirs.add(meta);
} else {
break;
@ -480,7 +482,7 @@ public static void addAncestors(MetadataStore metadataStore,
private static void addMoveStatus(Collection<Path> srcPaths,
Collection<PathMetadata> dstMetas,
Path srcPath,
FileStatus dstStatus) {
S3AFileStatus dstStatus) {
srcPaths.add(srcPath);
dstMetas.add(new PathMetadata(dstStatus));
}

View File

@ -44,12 +44,12 @@
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.s3a.MultipartUtils;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus;
import org.apache.hadoop.fs.s3a.S3AUtils;
import org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens;
import org.apache.hadoop.fs.s3a.commit.CommitConstants;
@ -703,7 +703,7 @@ private void putParentsIfNotPresent(FileStatus f) throws IOException {
if (dirCache.contains(parent)) {
return;
}
FileStatus dir = DynamoDBMetadataStore.makeDirStatus(parent,
S3AFileStatus dir = DynamoDBMetadataStore.makeDirStatus(parent,
f.getOwner());
getStore().put(new PathMetadata(dir));
dirCache.add(parent);
@ -718,13 +718,13 @@ private void putParentsIfNotPresent(FileStatus f) throws IOException {
*/
private long importDir(FileStatus status) throws IOException {
Preconditions.checkArgument(status.isDirectory());
RemoteIterator<LocatedFileStatus> it = getFilesystem()
RemoteIterator<S3ALocatedFileStatus> it = getFilesystem()
.listFilesAndEmptyDirectories(status.getPath(), true);
long items = 0;
while (it.hasNext()) {
LocatedFileStatus located = it.next();
FileStatus child;
S3ALocatedFileStatus located = it.next();
S3AFileStatus child;
if (located.isDirectory()) {
child = DynamoDBMetadataStore.makeDirStatus(located.getPath(),
located.getOwner());
@ -734,7 +734,9 @@ private long importDir(FileStatus status) throws IOException {
located.getModificationTime(),
located.getPath(),
located.getBlockSize(),
located.getOwner());
located.getOwner(),
located.getETag(),
located.getVersionId());
}
putParentsIfNotPresent(child);
getStore().put(new PathMetadata(child));
@ -761,7 +763,8 @@ public int run(String[] args, PrintStream out) throws Exception {
filePath = "/";
}
Path path = new Path(filePath);
FileStatus status = getFilesystem().getFileStatus(path);
S3AFileStatus status = (S3AFileStatus) getFilesystem()
.getFileStatus(path);
try {
initMetadataStore(false);
@ -1163,7 +1166,7 @@ public int run(String[] args, PrintStream out)
magic ? "is" : "is not");
println(out, "%nS3A Client");
printOption(out, "\tSigning Algorithm", SIGNING_ALGORITHM, "(unset)");
String endpoint = conf.getTrimmed(ENDPOINT, "");
println(out, "\tEndpoint: %s=%s",
ENDPOINT,
@ -1172,6 +1175,10 @@ public int run(String[] args, PrintStream out)
printOption(out, "\tEncryption", SERVER_SIDE_ENCRYPTION_ALGORITHM,
"none");
printOption(out, "\tInput seek policy", INPUT_FADVISE, INPUT_FADV_NORMAL);
printOption(out, "\tChange Detection Source", CHANGE_DETECT_SOURCE,
CHANGE_DETECT_SOURCE_DEFAULT);
printOption(out, "\tChange Detection Mode", CHANGE_DETECT_MODE,
CHANGE_DETECT_MODE_DEFAULT);
// look at delegation token support
if (fs.getDelegationTokens().isPresent()) {

View File

@ -690,10 +690,15 @@ Filesystem s3a://landsat-pds is not using S3Guard
The "magic" committer is not supported
S3A Client
Endpoint: fs.s3a.endpoint=(unset)
Signing Algorithm: fs.s3a.signing-algorithm=(unset)
Endpoint: fs.s3a.endpoint=s3.amazonaws.com
Encryption: fs.s3a.server-side-encryption-algorithm=none
Input seek policy: fs.s3a.experimental.input.fadvise=normal
2017-09-27 19:18:57,917 INFO util.ExitUtil: Exiting with status 46: 46: The magic committer is not enabled for s3a://landsat-pds
Change Detection Source: fs.s3a.change.detection.source=etag
Change Detection Mode: fs.s3a.change.detection.mode=server
Delegation token support is disabled
2019-05-17 13:53:38,245 [main] INFO util.ExitUtil (ExitUtil.java:terminate(210)) -
Exiting with status 46: 46: The magic committer is not enabled for s3a://landsat-pds
```
## Error message: "File being created has a magic path, but the filesystem has magic file support disabled:

View File

@ -522,12 +522,15 @@ $ hadoop s3guard bucket-info s3a://landsat-pds/
Filesystem s3a://landsat-pds
Location: us-west-2
Filesystem s3a://landsat-pds is not using S3Guard
The "magic" committer is supported
The "magic" committer is not supported
S3A Client
Signing Algorithm: fs.s3a.signing-algorithm=(unset)
Endpoint: fs.s3a.endpoint=s3.amazonaws.com
Encryption: fs.s3a.server-side-encryption-algorithm=none
Input seek policy: fs.s3a.experimental.input.fadvise=normal
Change Detection Source: fs.s3a.change.detection.source=etag
Change Detection Mode: fs.s3a.change.detection.mode=server
Delegation Support enabled: token kind = S3ADelegationToken/Session
Hadoop security mode: SIMPLE
```

View File

@ -644,9 +644,13 @@ Metadata Store Diagnostics:
The "magic" committer is supported
S3A Client
Signing Algorithm: fs.s3a.signing-algorithm=(unset)
Endpoint: fs.s3a.endpoint=s3-eu-west-1.amazonaws.com
Encryption: fs.s3a.server-side-encryption-algorithm=none
Input seek policy: fs.s3a.experimental.input.fadvise=normal
Change Detection Source: fs.s3a.change.detection.source=etag
Change Detection Mode: fs.s3a.change.detection.mode=server
Delegation token support is disabled
```
This listing includes all the information about the table supplied from
@ -1097,6 +1101,111 @@ based on usage information collected from previous days, and choosing a
combination of retry counts and an interval which allow for the clients to cope with
some throttling, but not to time-out other applications.
## Read-After-Overwrite Consistency
S3Guard provides read-after-overwrite consistency through ETags (default) or
object versioning checked either on the server (default) or client. This works
such that a reader reading a file after an overwrite either sees the new version
of the file or an error. Without S3Guard, new readers may see the original
version. Once S3 reaches eventual consistency, new readers will see the new
version.
Readers using S3Guard will usually see the new file version, but may
in rare cases see `RemoteFileChangedException` instead. This would occur if
an S3 object read cannot provide the version tracked in S3Guard metadata.
S3Guard achieves this behavior by storing ETags and object version IDs in the
S3Guard metadata store (e.g. DynamoDB). On opening a file, S3AFileSystem
will look in S3 for the version of the file indicated by the ETag or object
version ID stored in the metadata store. If that version is unavailable,
`RemoteFileChangedException` is thrown. Whether ETag or version ID and
server or client mode is used is determed by the
[fs.s3a.change.detection configuration options](./index.html#Handling_Read-During-Overwrite).
### No Versioning Metadata Available
When the first S3AFileSystem clients are upgraded to a version of
`S3AFileSystem` that contains these change tracking features, any existing
S3Guard metadata will not contain ETags or object version IDs. Reads of files
tracked in such S3Guard metadata will access whatever version of the file is
available in S3 at the time of read. Only if the file is subsequently updated
will S3Guard start tracking ETag and object version ID and as such generating
`RemoteFileChangedException` if an inconsistency is detected.
Similarly, when S3Guard metadata is pruned, S3Guard will no longer be able to
detect an inconsistent read. S3Guard metadata should be retained for at least
as long as the perceived possible read-after-overwrite temporary inconsistency
window. That window is expected to be short, but there are no guarantees so it
is at the administrator's discretion to weigh the risk.
### Known Limitations
#### S3 Select
S3 Select does not provide a capability for server-side ETag or object
version ID qualification. Whether `fs.s3a.change.detection.mode` is "client" or
"server", S3Guard will cause a client-side check of the file version before
opening the file with S3 Select. If the current version does not match the
version tracked in S3Guard, `RemoteFileChangedException` is thrown.
It is still possible that the S3 Select read will access a different version of
the file, if the visible file version changes between the version check and
the opening of the file. This can happen due to eventual consistency or
an overwrite of the file between the version check and the open of the file.
#### Rename
Rename is implemented via copy in S3. With `fs.s3a.change.detection.mode` set
to "client", a fully reliable mechansim for ensuring the copied content is the expected
content is not possible. This is the case since there isn't necessarily a way
to know the expected ETag or version ID to appear on the object resulting from
the copy.
Furthermore, if `fs.s3a.change.detection.mode` is "server" and a third-party S3
implementation is used that doesn't honor the provided ETag or version ID,
S3AFileSystem and S3Guard cannot detect it.
When `fs.s3.change.detection.mode` is "client", a client-side check
will be performed before the copy to ensure the current version of the file
matches S3Guard metadata. If not, `RemoteFileChangedException` is thrown.
Similar to as discussed with regard to S3 Select, this is not sufficient to
guarantee that same version is the version copied.
When `fs.s3.change.detection.mode` server, the expected version is also specified
in the underlying S3 `CopyObjectRequest`. As long as the server honors it, the
copied object will be correct.
All this said, with the defaults of `fs.s3.change.detection.mode` of "server" and
`fs.s3.change.detection.source` of "etag", when working with Amazon's S3, copy should in fact
either copy the expected file version or, in the case of an eventual consistency
anomaly, generate `RemoteFileChangedException`. The same should be true when
`fs.s3.change.detection.source` = "versionid".
#### Out of Sync Metadata
The S3Guard version tracking metadata (ETag or object version ID) could become
out of sync with the true current object metadata in S3. For example, S3Guard
is still tracking v1 of some file after v2 has been written. This could occur
for reasons such as a writer writing without utilizing S3Guard and/or
S3AFileSystem or simply due to a write with S3AFileSystem and S3Guard that wrote
successfully to S3, but failed in communication with S3Guard's metadata store
(e.g. DynamoDB).
If this happens, reads of the affected file(s) will result in
`RemoteFileChangedException` until one of:
* the S3Guard metadata is corrected out-of-band
* the file is overwritten (causing an S3Guard metadata update)
* the S3Guard metadata is pruned
The S3Guard metadata for a file can be corrected with the `s3guard import`
command as discussed above. The command can take a file URI instead of a
bucket URI to correct the metadata for a single file. For example:
```bash
hadoop s3guard import [-meta URI] s3a://my-bucket/file-with-bad-metadata
```
## Troubleshooting
### Error: `S3Guard table lacks version marker.`
@ -1215,6 +1324,97 @@ sync.
See [Fail on Error](#fail-on-error) for more detail.
### Error `RemoteFileChangedException`
An exception like the following could occur for a couple of reasons:
* the S3Guard metadata is out of sync with the true S3 metadata. For
example, the S3Guard DynamoDB table is tracking a different ETag than the ETag
shown in the exception. This may suggest the object was updated in S3 without
involvement from S3Guard or there was a transient failure when S3Guard tried to
write to DynamoDB.
* S3 is exhibiting read-after-overwrite temporary inconsistency. The S3Guard
metadata was updated with a new ETag during a recent write, but the current read
is not seeing that ETag due to S3 eventual consistency. This exception prevents
the reader from an inconsistent read where the reader sees an older version of
the file.
```
org.apache.hadoop.fs.s3a.RemoteFileChangedException: open 's3a://my-bucket/test/file.txt':
Change reported by S3 while reading at position 0.
ETag 4e886e26c072fef250cfaf8037675405 was unavailable
at org.apache.hadoop.fs.s3a.impl.ChangeTracker.processResponse(ChangeTracker.java:167)
at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:207)
at org.apache.hadoop.fs.s3a.S3AInputStream.lambda$lazySeek$1(S3AInputStream.java:355)
at org.apache.hadoop.fs.s3a.Invoker.lambda$retry$2(Invoker.java:195)
at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:109)
at org.apache.hadoop.fs.s3a.Invoker.lambda$retry$3(Invoker.java:265)
at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:322)
at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:261)
at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:193)
at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:215)
at org.apache.hadoop.fs.s3a.S3AInputStream.lazySeek(S3AInputStream.java:348)
at org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:381)
at java.io.FilterInputStream.read(FilterInputStream.java:83)
```
### Error `AWSClientIOException: copyFile` caused by `NullPointerException`
The AWS SDK has an [issue](https://github.com/aws/aws-sdk-java/issues/1644)
where it will throw a relatively generic `AmazonClientException` caused by
`NullPointerException` when copying a file and specifying a precondition
that cannot be met. This can bubble up from `S3AFileSystem.rename()`. It
suggests that the file in S3 is inconsistent with the metadata in S3Guard.
```
org.apache.hadoop.fs.s3a.AWSClientIOException: copyFile(test/rename-eventually2.dat, test/dest2.dat) on test/rename-eventually2.dat: com.amazonaws.AmazonClientException: Unable to complete transfer: null: Unable to complete transfer: null
at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:201)
at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:111)
at org.apache.hadoop.fs.s3a.Invoker.lambda$retry$4(Invoker.java:314)
at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:406)
at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:310)
at org.apache.hadoop.fs.s3a.Invoker.retry(Invoker.java:285)
at org.apache.hadoop.fs.s3a.S3AFileSystem.copyFile(S3AFileSystem.java:3034)
at org.apache.hadoop.fs.s3a.S3AFileSystem.innerRename(S3AFileSystem.java:1258)
at org.apache.hadoop.fs.s3a.S3AFileSystem.rename(S3AFileSystem.java:1119)
at org.apache.hadoop.fs.s3a.ITestS3ARemoteFileChanged.lambda$testRenameEventuallyConsistentFile2$6(ITestS3ARemoteFileChanged.java:556)
at org.apache.hadoop.test.LambdaTestUtils.intercept(LambdaTestUtils.java:498)
at org.apache.hadoop.fs.s3a.ITestS3ARemoteFileChanged.testRenameEventuallyConsistentFile2(ITestS3ARemoteFileChanged.java:554)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
at org.junit.internal.runners.statements.FailOnTimeout$CallableStatement.call(FailOnTimeout.java:298)
at org.junit.internal.runners.statements.FailOnTimeout$CallableStatement.call(FailOnTimeout.java:292)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.lang.Thread.run(Thread.java:748)
Caused by: com.amazonaws.AmazonClientException: Unable to complete transfer: null
at com.amazonaws.services.s3.transfer.internal.AbstractTransfer.unwrapExecutionException(AbstractTransfer.java:286)
at com.amazonaws.services.s3.transfer.internal.AbstractTransfer.rethrowExecutionException(AbstractTransfer.java:265)
at com.amazonaws.services.s3.transfer.internal.CopyImpl.waitForCopyResult(CopyImpl.java:67)
at org.apache.hadoop.fs.s3a.impl.CopyOutcome.waitForCopy(CopyOutcome.java:72)
at org.apache.hadoop.fs.s3a.S3AFileSystem.lambda$copyFile$14(S3AFileSystem.java:3047)
at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:109)
... 25 more
Caused by: java.lang.NullPointerException
at com.amazonaws.services.s3.transfer.internal.CopyCallable.copyInOneChunk(CopyCallable.java:154)
at com.amazonaws.services.s3.transfer.internal.CopyCallable.call(CopyCallable.java:134)
at com.amazonaws.services.s3.transfer.internal.CopyMonitor.call(CopyMonitor.java:132)
at com.amazonaws.services.s3.transfer.internal.CopyMonitor.call(CopyMonitor.java:43)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
... 1 more
```
## Other Topics
For details on how to test S3Guard, see [Testing S3Guard](./testing.html#s3guard)

View File

@ -298,6 +298,24 @@ plugin:
```bash
mvn surefire-report:failsafe-report-only
```
## <a name="versioning"></a> Testing Versioned Stores
Some tests (specifically some in `ITestS3ARemoteFileChanged`) require
a versioned bucket for full test coverage as well as S3Guard being enabled.
To enable versioning in a bucket.
1. In the AWS S3 Management console find and select the bucket.
1. In the Properties "tab", set it as versioned.
1. <i>Important</i> Create a lifecycle rule to automatically clean up old versions
after 24h. This avoids running up bills for objects which tests runs create and
then delete.
1. Run the tests again.
Once a bucket is converted to being versioned, it cannot be converted back
to being unversioned.
## <a name="scale"></a> Scale Tests
There are a set of tests designed to measure the scalability and performance

View File

@ -970,8 +970,8 @@ and the like. The standard strategy here is to save to HDFS and then copy to S3.
```
org.apache.hadoop.fs.s3a.RemoteFileChangedException: re-open `s3a://my-bucket/test/file.txt':
ETag change reported by S3 while reading at position 1949.
Version f9c186d787d4de9657e99f280ba26555 was unavailable
Change reported by S3 while reading at position 1949.
ETag f9c186d787d4de9657e99f280ba26555 was unavailable
at org.apache.hadoop.fs.s3a.impl.ChangeTracker.processResponse(ChangeTracker.java:137)
at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:200)
at org.apache.hadoop.fs.s3a.S3AInputStream.lambda$lazySeek$1(S3AInputStream.java:346)

View File

@ -56,19 +56,26 @@ public abstract class AbstractS3AMockTest {
@Before
public void setup() throws Exception {
Configuration conf = createConfiguration();
fs = new S3AFileSystem();
URI uri = URI.create(FS_S3A + "://" + BUCKET);
fs.initialize(uri, conf);
s3 = fs.getAmazonS3ClientForTesting("mocking");
}
public Configuration createConfiguration() {
Configuration conf = new Configuration();
conf.setClass(S3_CLIENT_FACTORY_IMPL, MockS3ClientFactory.class,
S3ClientFactory.class);
// We explicitly disable MetadataStore even if it's configured. For unit
// We explicitly disable MetadataStore. For unit
// test we don't issue request to AWS DynamoDB service.
conf.setClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class,
MetadataStore.class);
// FS is always magic
conf.setBoolean(CommitConstants.MAGIC_COMMITTER_ENABLED, true);
fs = new S3AFileSystem();
URI uri = URI.create(FS_S3A + "://" + BUCKET);
fs.initialize(uri, conf);
s3 = fs.getAmazonS3ClientForTesting("mocking");
// use minimum multipart size for faster triggering
conf.setLong(Constants.MULTIPART_SIZE, MULTIPART_MIN_SIZE);
return conf;
}
@After

View File

@ -22,7 +22,11 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy;
import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source;
import org.apache.hadoop.test.LambdaTestUtils;
import org.junit.Assume;
import org.junit.Test;
import java.io.FileNotFoundException;
@ -43,6 +47,12 @@ public class ITestS3ADelayedFNF extends AbstractS3ATestBase {
@Test
public void testNotFoundFirstRead() throws Exception {
FileSystem fs = getFileSystem();
ChangeDetectionPolicy changeDetectionPolicy =
((S3AFileSystem) fs).getChangeDetectionPolicy();
Assume.assumeFalse("FNF not expected when using a bucket with"
+ " object versioning",
changeDetectionPolicy.getSource() == Source.VersionId);
Path p = path("some-file");
ContractTestUtils.createFile(fs, p, false, new byte[] {20, 21, 22});

View File

@ -24,9 +24,13 @@
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.contract.s3a.S3AContract;
import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy;
import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source;
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore;
import org.apache.hadoop.test.LambdaTestUtils;
import org.junit.Assume;
import org.junit.Test;
import java.io.FileNotFoundException;
@ -106,6 +110,12 @@ public void testGetFileStatus() throws Exception {
@Test
public void testOpenDeleteRead() throws Exception {
S3AFileSystem fs = getFileSystem();
ChangeDetectionPolicy changeDetectionPolicy =
((S3AFileSystem) fs).getChangeDetectionPolicy();
Assume.assumeFalse("FNF not expected when using a bucket with"
+ " object versioning",
changeDetectionPolicy.getSource() == Source.VersionId);
Path p = path("testOpenDeleteRead.txt");
writeTextFile(fs, p, "1337c0d3z", true);
try (InputStream s = fs.open(p)) {

View File

@ -30,6 +30,7 @@
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.apache.hadoop.fs.contract.s3a.S3AContract;
import com.google.common.collect.Lists;
import org.junit.Assume;
import org.junit.Test;
@ -253,7 +254,7 @@ public void testConsistentRenameAfterDelete() throws Exception {
DEFAULT_DELAY_KEY_SUBSTRING)));
try {
RemoteIterator<LocatedFileStatus> old = fs.listFilesAndEmptyDirectories(
RemoteIterator<S3ALocatedFileStatus> old = fs.listFilesAndEmptyDirectories(
path("a"), true);
fail("Recently renamed dir should not be visible");
} catch(FileNotFoundException e) {
@ -557,6 +558,44 @@ public void testInconsistentS3ClientDeletes() throws Throwable {
);
}
/**
* Tests that the file's eTag and versionId are preserved in recursive
* listings.
*/
@Test
public void testListingReturnsVersionMetadata() throws Throwable {
S3AFileSystem fs = getFileSystem();
Assume.assumeTrue(fs.hasMetadataStore());
// write simple file
Path file = path("file1");
try (FSDataOutputStream outputStream = fs.create(file)) {
outputStream.writeChars("hello");
}
// get individual file status
FileStatus[] fileStatuses = fs.listStatus(file);
assertEquals(1, fileStatuses.length);
S3AFileStatus status = (S3AFileStatus) fileStatuses[0];
String eTag = status.getETag();
String versionId = status.getVersionId();
// get status through recursive directory listing
RemoteIterator<LocatedFileStatus> filesIterator = fs.listFiles(
file.getParent(), true);
List<LocatedFileStatus> files = Lists.newArrayList();
while (filesIterator.hasNext()) {
files.add(filesIterator.next());
}
assertEquals(1, files.size());
// ensure eTag and versionId are preserved in directory listing
S3ALocatedFileStatus locatedFileStatus =
(S3ALocatedFileStatus) files.get(0);
assertEquals(eTag, locatedFileStatus.getETag());
assertEquals(versionId, locatedFileStatus.getVersionId());
}
/**
* Assert that the two list sizes match; failure message includes the lists.
* @param message text for the assertion

View File

@ -26,6 +26,7 @@
import java.util.UUID;
import java.util.stream.Collectors;
import org.junit.Assume;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
@ -37,6 +38,8 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy;
import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source;
import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
@ -243,6 +246,12 @@ public void testLongerLengthOverwrite() throws Exception {
@Test
public void testOutOfBandDeletes() throws Exception {
ChangeDetectionPolicy changeDetectionPolicy =
((S3AFileSystem) getFileSystem()).getChangeDetectionPolicy();
Assume.assumeFalse("FNF not expected when using a bucket with"
+ " object versioning",
changeDetectionPolicy.getSource() == Source.VersionId);
Path testFileName = path("OutOfBandDelete-" + UUID.randomUUID());
outOfBandDeletes(testFileName, authoritative);
}

View File

@ -177,7 +177,7 @@ public boolean exists(Path f) throws IOException {
}
@Override
void finishedWrite(String key, long length) {
void finishedWrite(String key, long length, String eTag, String versionId) {
}

View File

@ -1042,30 +1042,25 @@ public static void verifyFileStatus(FileStatus status,
* Verify the status entry of a directory matches that expected.
* @param status status entry to check
* @param replication replication factor
* @param modTime modified time
* @param accessTime access time
* @param owner owner
* @param group user group
* @param permission permission.
*/
public static void verifyDirStatus(FileStatus status,
public static void verifyDirStatus(S3AFileStatus status,
int replication,
long modTime,
long accessTime,
String owner,
String group,
FsPermission permission) {
String owner) {
String details = status.toString();
assertTrue("Is a dir: " + details, status.isDirectory());
assertEquals("zero length: " + details, 0, status.getLen());
assertEquals("Mod time: " + details, modTime, status.getModificationTime());
// S3AFileStatus always assigns modTime = System.currentTimeMillis()
assertTrue("Mod time: " + details, status.getModificationTime() > 0);
assertEquals("Replication value: " + details, replication,
status.getReplication());
assertEquals("Access time: " + details, accessTime, status.getAccessTime());
assertEquals("Access time: " + details, 0, status.getAccessTime());
assertEquals("Owner: " + details, owner, status.getOwner());
assertEquals("Group: " + details, group, status.getGroup());
assertEquals("Permission: " + details, permission, status.getPermission());
// S3AFileStatus always assigns group=owner
assertEquals("Group: " + details, owner, status.getGroup());
// S3AFileStatus always assigns permission = default
assertEquals("Permission: " + details,
FsPermission.getDefault(), status.getPermission());
}
/**

View File

@ -19,7 +19,6 @@
package org.apache.hadoop.fs.s3a;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Assert;
@ -40,11 +39,11 @@
*/
public class TestListing extends AbstractS3AMockTest {
private static class MockRemoteIterator<FileStatus> implements
RemoteIterator<FileStatus> {
private Iterator<FileStatus> iterator;
private static class MockRemoteIterator<S3AFileStatus> implements
RemoteIterator<S3AFileStatus> {
private Iterator<S3AFileStatus> iterator;
MockRemoteIterator(Collection<FileStatus> source) {
MockRemoteIterator(Collection<S3AFileStatus> source) {
iterator = source.iterator();
}
@ -52,13 +51,13 @@ public boolean hasNext() {
return iterator.hasNext();
}
public FileStatus next() {
public S3AFileStatus next() {
return iterator.next();
}
}
private FileStatus blankFileStatus(Path path) {
return new FileStatus(0, true, 0, 0, 0, path);
private S3AFileStatus blankFileStatus(Path path) {
return new S3AFileStatus(Tristate.UNKNOWN, path, null);
}
@Test
@ -78,11 +77,11 @@ public void testTombstoneReconcilingIterator() throws Exception {
Set<Path> tombstones = new HashSet<>();
tombstones.add(deletedChild);
RemoteIterator<FileStatus> sourceIterator = new MockRemoteIterator(
RemoteIterator<S3AFileStatus> sourceIterator = new MockRemoteIterator(
statuses);
RemoteIterator<LocatedFileStatus> locatedIterator =
RemoteIterator<S3ALocatedFileStatus> locatedIterator =
listing.createLocatedFileStatusIterator(sourceIterator);
RemoteIterator<LocatedFileStatus> reconcilingIterator =
RemoteIterator<S3ALocatedFileStatus> reconcilingIterator =
listing.createTombstoneReconcilingIterator(locatedIterator, tombstones);
Set<Path> expectedPaths = new HashSet<>();
@ -98,8 +97,12 @@ public void testTombstoneReconcilingIterator() throws Exception {
@Test
public void testProvidedFileStatusIteratorEnd() throws Exception {
FileStatus[] statuses = {
new FileStatus(100, false, 1, 8192, 0, new Path("s3a://blah/blah"))
S3AFileStatus s3aStatus = new S3AFileStatus(
100, 0, new Path("s3a://blah/blah"),
8192, null, null, null);
S3AFileStatus[] statuses = {
s3aStatus
};
ProvidedFileStatusIterator it = new ProvidedFileStatusIterator(statuses,
ACCEPT_ALL, new Listing.AcceptAllButS3nDirs());

View File

@ -20,18 +20,23 @@
import java.util.concurrent.atomic.AtomicLong;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.SdkBaseException;
import com.amazonaws.services.s3.Headers;
import com.amazonaws.services.s3.model.CopyObjectRequest;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.S3Object;
import org.apache.hadoop.fs.PathIOException;
import com.amazonaws.services.s3.transfer.model.CopyResult;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy;
import org.apache.hadoop.fs.s3a.impl.ChangeTracker;
import org.apache.hadoop.test.HadoopTestBase;
import org.apache.http.HttpStatus;
import static org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.CHANGE_DETECTED;
import static org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.createPolicy;
@ -50,6 +55,8 @@ public class TestStreamChangeTracker extends HadoopTestBase {
public static final String OBJECT = "object";
public static final String DEST_OBJECT = "new_object";
public static final String URI = "s3a://" + BUCKET + "/" + OBJECT;
@Test
@ -161,12 +168,108 @@ public void testVersionCheckingOnServer() throws Throwable {
CHANGE_DETECTED);
}
@Test
public void testVersionCheckingUpfrontETag() throws Throwable {
ChangeTracker tracker = newTracker(
ChangeDetectionPolicy.Mode.Server,
ChangeDetectionPolicy.Source.ETag,
false,
objectAttributes("etag1", "versionid1"));
assertEquals("etag1", tracker.getRevisionId());
}
@Test
public void testVersionCheckingUpfrontVersionId() throws Throwable {
ChangeTracker tracker = newTracker(
ChangeDetectionPolicy.Mode.Server,
ChangeDetectionPolicy.Source.VersionId,
false,
objectAttributes("etag1", "versionid1"));
assertEquals("versionid1", tracker.getRevisionId());
}
@Test
public void testVersionCheckingETagCopyServer() throws Throwable {
ChangeTracker tracker = newTracker(
ChangeDetectionPolicy.Mode.Server,
ChangeDetectionPolicy.Source.VersionId,
false,
objectAttributes("etag1", "versionid1"));
assertConstraintApplied(tracker, newCopyObjectRequest());
}
@Test
public void testVersionCheckingETagCopyClient() throws Throwable {
ChangeTracker tracker = newTracker(
ChangeDetectionPolicy.Mode.Client,
ChangeDetectionPolicy.Source.VersionId,
false,
objectAttributes("etag1", "versionid1"));
assertFalse("Tracker should not have applied contraints " + tracker,
tracker.maybeApplyConstraint(newCopyObjectRequest()));
}
@Test
public void testCopyVersionIdRequired() throws Throwable {
ChangeTracker tracker = newTracker(
ChangeDetectionPolicy.Mode.Client,
ChangeDetectionPolicy.Source.VersionId,
true,
objectAttributes("etag1", "versionId"));
expectNoVersionAttributeException(tracker, newCopyResult("etag1",
null),
"policy requires VersionId");
}
@Test
public void testCopyETagRequired() throws Throwable {
ChangeTracker tracker = newTracker(
ChangeDetectionPolicy.Mode.Client,
ChangeDetectionPolicy.Source.ETag,
true,
objectAttributes("etag1", "versionId"));
expectNoVersionAttributeException(tracker, newCopyResult(null,
"versionId"),
"policy requires ETag");
}
@Test
public void testCopyVersionMismatch() throws Throwable {
ChangeTracker tracker = newTracker(
ChangeDetectionPolicy.Mode.Server,
ChangeDetectionPolicy.Source.ETag,
true,
objectAttributes("etag", "versionId"));
// 412 is translated to RemoteFileChangedException
// note: this scenario is never currently hit due to
// https://github.com/aws/aws-sdk-java/issues/1644
AmazonServiceException awsException =
new AmazonServiceException("aws exception");
awsException.setStatusCode(HttpStatus.SC_PRECONDITION_FAILED);
expectChangeException(tracker, awsException, "copy",
RemoteFileChangedException.PRECONDITIONS_FAILED);
// processing another type of exception does nothing
tracker.processException(new SdkBaseException("foo"), "copy");
}
protected void assertConstraintApplied(final ChangeTracker tracker,
final GetObjectRequest request) {
assertTrue("Tracker should have applied contraints " + tracker,
tracker.maybeApplyConstraint(request));
}
protected void assertConstraintApplied(final ChangeTracker tracker,
final CopyObjectRequest request) throws PathIOException {
assertTrue("Tracker should have applied contraints " + tracker,
tracker.maybeApplyConstraint(request));
}
protected RemoteFileChangedException expectChangeException(
final ChangeTracker tracker,
final S3Object response,
@ -175,6 +278,15 @@ protected RemoteFileChangedException expectChangeException(
RemoteFileChangedException.class);
}
protected RemoteFileChangedException expectChangeException(
final ChangeTracker tracker,
final SdkBaseException exception,
final String operation,
final String message) throws Exception {
return expectException(tracker, exception, operation, message,
RemoteFileChangedException.class);
}
protected PathIOException expectNoVersionAttributeException(
final ChangeTracker tracker,
final S3Object response,
@ -183,6 +295,14 @@ protected PathIOException expectNoVersionAttributeException(
NoVersionAttributeException.class);
}
protected PathIOException expectNoVersionAttributeException(
final ChangeTracker tracker,
final CopyResult response,
final String message) throws Exception {
return expectException(tracker, response, message,
NoVersionAttributeException.class);
}
protected <T extends Exception> T expectException(
final ChangeTracker tracker,
final S3Object response,
@ -197,6 +317,35 @@ protected <T extends Exception> T expectException(
});
}
protected <T extends Exception> T expectException(
final ChangeTracker tracker,
final CopyResult response,
final String message,
final Class<T> clazz) throws Exception {
return intercept(
clazz,
message,
() -> {
tracker.processResponse(response);
return tracker;
});
}
protected <T extends Exception> T expectException(
final ChangeTracker tracker,
final SdkBaseException exception,
final String operation,
final String message,
final Class<T> clazz) throws Exception {
return intercept(
clazz,
message,
() -> {
tracker.processException(exception, operation);
return tracker;
});
}
protected void assertRevisionId(final ChangeTracker tracker,
final String revId) {
assertEquals("Wrong revision ID in " + tracker,
@ -218,14 +367,29 @@ protected void assertTrackerMismatchCount(
*/
protected ChangeTracker newTracker(final ChangeDetectionPolicy.Mode mode,
final ChangeDetectionPolicy.Source source, boolean requireVersion) {
return newTracker(mode, source, requireVersion,
objectAttributes(null, null));
}
/**
* Create tracker.
* Contains standard assertions(s).
* @return the tracker.
*/
protected ChangeTracker newTracker(final ChangeDetectionPolicy.Mode mode,
final ChangeDetectionPolicy.Source source, boolean requireVersion,
S3ObjectAttributes objectAttributes) {
ChangeDetectionPolicy policy = createPolicy(
mode,
source,
requireVersion);
ChangeTracker tracker = new ChangeTracker(URI, policy,
new AtomicLong(0));
new AtomicLong(0), objectAttributes);
if (objectAttributes.getVersionId() == null
&& objectAttributes.getETag() == null) {
assertFalse("Tracker should not have applied constraints " + tracker,
tracker.maybeApplyConstraint(newGetObjectRequest()));
}
return tracker;
}
@ -233,6 +397,21 @@ private GetObjectRequest newGetObjectRequest() {
return new GetObjectRequest(BUCKET, OBJECT);
}
private CopyObjectRequest newCopyObjectRequest() {
return new CopyObjectRequest(BUCKET, OBJECT, BUCKET, DEST_OBJECT);
}
private CopyResult newCopyResult(String eTag, String versionId) {
CopyResult copyResult = new CopyResult();
copyResult.setSourceBucketName(BUCKET);
copyResult.setSourceKey(OBJECT);
copyResult.setDestinationBucketName(BUCKET);
copyResult.setDestinationKey(DEST_OBJECT);
copyResult.setETag(eTag);
copyResult.setVersionId(versionId);
return copyResult;
}
private S3Object newResponse(String etag, String versionId) {
ObjectMetadata md = new ObjectMetadata();
if (etag != null) {
@ -252,4 +431,14 @@ private S3Object emptyResponse() {
response.setKey(OBJECT);
return response;
}
private S3ObjectAttributes objectAttributes(
String etag, String versionId) {
return new S3ObjectAttributes(BUCKET,
OBJECT,
null,
null,
etag,
versionId);
}
}

View File

@ -221,7 +221,7 @@ protected void createFile(Path path, boolean onS3, boolean onMetadataStore)
ContractTestUtils.touch(fs, path);
} else if (onMetadataStore) {
S3AFileStatus status = new S3AFileStatus(100L, System.currentTimeMillis(),
fs.qualify(path), 512L, "hdfs");
fs.qualify(path), 512L, "hdfs", null, null);
putFile(ms, status);
}
}

View File

@ -246,12 +246,13 @@ public DynamoDBMSContract createContract(Configuration conf) {
}
@Override
FileStatus basicFileStatus(Path path, int size, boolean isDir)
S3AFileStatus basicFileStatus(Path path, int size, boolean isDir)
throws IOException {
String owner = UserGroupInformation.getCurrentUser().getShortUserName();
return isDir
? new S3AFileStatus(true, path, owner)
: new S3AFileStatus(size, getModTime(), path, BLOCK_SIZE, owner);
: new S3AFileStatus(size, getModTime(), path, BLOCK_SIZE, owner,
null, null);
}
private DynamoDBMetadataStore getDynamoMetadataStore() throws IOException {

View File

@ -41,7 +41,6 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.StorageStatistics;
import org.apache.hadoop.fs.contract.ContractTestUtils;
@ -231,7 +230,7 @@ public void test_030_BatchedWrite() throws Exception {
long pruneItems = 0;
for (long i = 0; i < iterations; i++) {
Path longPath = pathOfDepth(BATCH_SIZE, String.valueOf(i));
FileStatus status = basicFileStatus(longPath, 0, false, 12345,
S3AFileStatus status = basicFileStatus(longPath, 0, false,
12345);
PathMetadata pm = new PathMetadata(status);
synchronized (toCleanup) {

View File

@ -37,7 +37,9 @@
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.Tristate;
import static org.apache.hadoop.fs.s3a.MultipartTestUtils.*;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.getLandsatCSVFile;
@ -95,6 +97,41 @@ public void testImportCommand() throws Exception {
// assertTrue(children.isAuthoritative());
}
@Test
public void testImportCommandRepairsETagAndVersionId() throws Exception {
S3AFileSystem fs = getFileSystem();
MetadataStore ms = getMetadataStore();
Path path = path("test-version-metadata");
try (FSDataOutputStream out = fs.create(path)) {
out.write(1);
}
S3AFileStatus originalStatus = (S3AFileStatus) fs.getFileStatus(path);
// put in bogus ETag and versionId
S3AFileStatus bogusStatus = S3AFileStatus.fromFileStatus(originalStatus,
Tristate.FALSE, "bogusETag", "bogusVersionId");
ms.put(new PathMetadata(bogusStatus));
// sanity check that bogus status is actually persisted
S3AFileStatus retrievedBogusStatus = (S3AFileStatus) fs.getFileStatus(path);
assertEquals("bogus ETag was not persisted",
"bogusETag", retrievedBogusStatus.getETag());
assertEquals("bogus versionId was not persisted",
"bogusVersionId", retrievedBogusStatus.getVersionId());
// execute the import
S3GuardTool.Import cmd = new S3GuardTool.Import(fs.getConf());
cmd.setStore(ms);
exec(cmd, "import", path.toString());
// make sure ETag and versionId were corrected
S3AFileStatus updatedStatus = (S3AFileStatus) fs.getFileStatus(path);
assertEquals("ETag was not corrected",
originalStatus.getETag(), updatedStatus.getETag());
assertEquals("VersionId was not corrected",
originalStatus.getVersionId(), updatedStatus.getVersionId());
}
@Test
public void testDestroyBucketExistsButNoTable() throws Throwable {
run(Destroy.NAME,

View File

@ -38,6 +38,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
import org.apache.hadoop.fs.s3a.Tristate;
import org.apache.hadoop.io.IOUtils;
@ -61,11 +62,13 @@ public abstract class MetadataStoreTestBase extends HadoopTestBase {
/** Some dummy values for sanity-checking FileStatus contents. */
static final long BLOCK_SIZE = 32 * 1024 * 1024;
static final int REPLICATION = 1;
static final FsPermission PERMISSION = new FsPermission((short)0755);
static final String OWNER = "bob";
static final String GROUP = "uncles";
private final long accessTime = System.currentTimeMillis();
private final long modTime = accessTime - 5000;
private final long modTime = System.currentTimeMillis() - 5000;
// attributes not supported by S3AFileStatus
static final FsPermission PERMISSION = null;
static final String GROUP = null;
private final long accessTime = 0;
/**
* Each test should override this. Will use a new Configuration instance.
@ -148,14 +151,14 @@ private void doTestDescendantsIterator(
String[] checkNodes) throws Exception {
// we set up the example file system tree in metadata store
for (String pathStr : createNodes) {
final FileStatus status = pathStr.contains("file")
final S3AFileStatus status = pathStr.contains("file")
? basicFileStatus(strToPath(pathStr), 100, false)
: basicFileStatus(strToPath(pathStr), 0, true);
ms.put(new PathMetadata(status));
}
final PathMetadata rootMeta = new PathMetadata(makeDirStatus("/"));
RemoteIterator<FileStatus> iterator;
RemoteIterator<S3AFileStatus> iterator;
if (implementation == DescendantsIterator.class) {
iterator = new DescendantsIterator(ms, rootMeta);
} else if (implementation == MetadataStoreListFilesIterator.class) {
@ -647,8 +650,7 @@ public void testPruneFiles() throws Exception {
createNewDirs("/pruneFiles");
long oldTime = getTime();
ms.put(new PathMetadata(makeFileStatus("/pruneFiles/old", 1, oldTime,
oldTime)));
ms.put(new PathMetadata(makeFileStatus("/pruneFiles/old", 1, oldTime)));
DirListingMetadata ls2 = ms.listChildren(strToPath("/pruneFiles"));
if (!allowMissing()) {
assertListingsEqual(ls2.getListing(), "/pruneFiles/old");
@ -659,8 +661,7 @@ public void testPruneFiles() throws Exception {
Thread.sleep(1);
long cutoff = System.currentTimeMillis();
long newTime = getTime();
ms.put(new PathMetadata(makeFileStatus("/pruneFiles/new", 1, newTime,
newTime)));
ms.put(new PathMetadata(makeFileStatus("/pruneFiles/new", 1, newTime)));
DirListingMetadata ls;
ls = ms.listChildren(strToPath("/pruneFiles"));
@ -691,7 +692,7 @@ public void testPruneDirs() throws Exception {
long oldTime = getTime();
ms.put(new PathMetadata(makeFileStatus("/pruneDirs/dir/file",
1, oldTime, oldTime)));
1, oldTime)));
// It's possible for the Local implementation to get from the old
// modification time to here in under 1ms, causing it to not get pruned
@ -715,10 +716,10 @@ public void testPruneUnsetsAuthoritative() throws Exception {
createNewDirs(rootDir, grandparentDir, parentDir);
long time = System.currentTimeMillis();
ms.put(new PathMetadata(
new FileStatus(0, false, 0, 0, time - 1, strToPath(staleFile)),
basicFileStatus(0, false, 0, time - 1, strToPath(staleFile)),
Tristate.FALSE, false));
ms.put(new PathMetadata(
new FileStatus(0, false, 0, 0, time + 1, strToPath(freshFile)),
basicFileStatus(0, false, 0, time + 1, strToPath(freshFile)),
Tristate.FALSE, false));
// set parent dir as authoritative
@ -752,10 +753,10 @@ public void testPrunePreservesAuthoritative() throws Exception {
createNewDirs(rootDir, grandparentDir, parentDir);
long time = System.currentTimeMillis();
ms.put(new PathMetadata(
new FileStatus(0, false, 0, 0, time + 1, strToPath(staleFile)),
basicFileStatus(0, false, 0, time + 1, strToPath(staleFile)),
Tristate.FALSE, false));
ms.put(new PathMetadata(
new FileStatus(0, false, 0, 0, time + 1, strToPath(freshFile)),
basicFileStatus(0, false, 0, time + 1, strToPath(freshFile)),
Tristate.FALSE, false));
if (!allowMissing()) {
@ -811,7 +812,7 @@ public void testPutDirListingMetadataPutsFileMetadata()
@Test
public void testPutRetainsIsDeletedInParentListing() throws Exception {
final Path path = strToPath("/a/b");
final FileStatus fileStatus = basicFileStatus(path, 0, false);
final S3AFileStatus fileStatus = basicFileStatus(path, 0, false);
PathMetadata pm = new PathMetadata(fileStatus);
pm.setIsDeleted(true);
ms.put(pm);
@ -948,40 +949,54 @@ private void assertEmptyDirs(String ...dirs) throws IOException {
}
}
FileStatus basicFileStatus(Path path, int size, boolean isDir) throws
S3AFileStatus basicFileStatus(Path path, int size, boolean isDir) throws
IOException {
return basicFileStatus(path, size, isDir, modTime, accessTime);
return basicFileStatus(path, size, isDir, modTime);
}
public static FileStatus basicFileStatus(Path path, int size, boolean isDir,
long newModTime, long newAccessTime) throws IOException {
return new FileStatus(size, isDir, REPLICATION, BLOCK_SIZE, newModTime,
newAccessTime, PERMISSION, OWNER, GROUP, path);
S3AFileStatus basicFileStatus(int size, boolean isDir,
long blockSize, long modificationTime, Path path) {
if (isDir) {
return new S3AFileStatus(Tristate.UNKNOWN, path, null);
} else {
return new S3AFileStatus(size, modificationTime, path, blockSize, null,
null, null);
}
}
private FileStatus makeFileStatus(String pathStr, int size) throws
public static S3AFileStatus basicFileStatus(Path path, int size,
boolean isDir, long newModTime) throws IOException {
if (isDir) {
return new S3AFileStatus(Tristate.UNKNOWN, path, OWNER);
} else {
return new S3AFileStatus(size, newModTime, path, BLOCK_SIZE, OWNER,
null, null);
}
}
private S3AFileStatus makeFileStatus(String pathStr, int size) throws
IOException {
return makeFileStatus(pathStr, size, modTime, accessTime);
return makeFileStatus(pathStr, size, modTime);
}
private FileStatus makeFileStatus(String pathStr, int size, long newModTime,
long newAccessTime) throws IOException {
private S3AFileStatus makeFileStatus(String pathStr, int size,
long newModTime) throws IOException {
return basicFileStatus(strToPath(pathStr), size, false,
newModTime, newAccessTime);
newModTime);
}
void verifyFileStatus(FileStatus status, long size) {
S3ATestUtils.verifyFileStatus(status, size, BLOCK_SIZE, modTime);
}
private FileStatus makeDirStatus(String pathStr) throws IOException {
return basicFileStatus(strToPath(pathStr), 0, true, modTime, accessTime);
private S3AFileStatus makeDirStatus(String pathStr) throws IOException {
return basicFileStatus(strToPath(pathStr), 0, true, modTime);
}
/**
* Verify the directory file status. Subclass may verify additional fields.
*/
void verifyDirStatus(FileStatus status) {
void verifyDirStatus(S3AFileStatus status) {
assertTrue("Is a dir", status.isDirectory());
assertEquals("zero length", 0, status.getLen());
}

View File

@ -38,6 +38,8 @@
public class TestDirListingMetadata {
private static final String TEST_OWNER = "hadoop";
public static final String TEST_ETAG = "abc";
public static final String TEST_VERSION_ID = "def";
@Rule
public ExpectedException exception = ExpectedException.none();
@ -79,7 +81,8 @@ public void testListing() {
PathMetadata pathMeta2 = new PathMetadata(
new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
PathMetadata pathMeta3 = new PathMetadata(
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER,
TEST_ETAG, TEST_VERSION_ID));
List<PathMetadata> listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
DirListingMetadata meta = new DirListingMetadata(path, listing, false);
assertEquals(path, meta.getPath());
@ -130,7 +133,8 @@ public void testGet() {
PathMetadata pathMeta2 = new PathMetadata(
new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
PathMetadata pathMeta3 = new PathMetadata(
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER,
TEST_ETAG, TEST_VERSION_ID));
List<PathMetadata> listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
DirListingMetadata meta = new DirListingMetadata(path, listing, false);
assertEquals(path, meta.getPath());
@ -181,7 +185,8 @@ public void testPut() {
PathMetadata pathMeta2 = new PathMetadata(
new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
PathMetadata pathMeta3 = new PathMetadata(
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER,
TEST_ETAG, TEST_VERSION_ID));
List<PathMetadata> listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
DirListingMetadata meta = new DirListingMetadata(path, listing, false);
assertEquals(path, meta.getPath());
@ -243,7 +248,8 @@ public void testRemove() {
PathMetadata pathMeta2 = new PathMetadata(
new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
PathMetadata pathMeta3 = new PathMetadata(
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER,
TEST_ETAG, TEST_VERSION_ID));
List<PathMetadata> listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
DirListingMetadata meta = new DirListingMetadata(path, listing, false);
assertEquals(path, meta.getPath());
@ -296,7 +302,7 @@ private static DirListingMetadata makeTwoDirsOneFile(Path parent) {
new S3AFileStatus(true, new Path(parent, "dir2"), TEST_OWNER));
PathMetadata pathMeta3 = new PathMetadata(
new S3AFileStatus(123, 456, new Path(parent, "file1"), 8192,
TEST_OWNER));
TEST_OWNER, TEST_ETAG, TEST_VERSION_ID));
List<PathMetadata> listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
return new DirListingMetadata(parent, listing, false);
}

View File

@ -30,7 +30,9 @@
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
import org.apache.hadoop.fs.s3a.Tristate;
/**
* MetadataStore unit test for {@link LocalMetadataStore}.
@ -168,8 +170,8 @@ private static void populateMap(Cache<Path, LocalMetadataEntry> cache,
private static void populateEntry(Cache<Path, LocalMetadataEntry> cache,
Path path) {
FileStatus fileStatus = new FileStatus(0, true, 0, 0, 0, path);
cache.put(path, new LocalMetadataEntry(new PathMetadata(fileStatus)));
S3AFileStatus s3aStatus = new S3AFileStatus(Tristate.UNKNOWN, path, null);
cache.put(path, new LocalMetadataEntry(new PathMetadata(s3aStatus)));
}
private static long sizeOfMap(Cache<Path, LocalMetadataEntry> cache) {
@ -196,9 +198,8 @@ protected void verifyFileStatus(FileStatus status, long size) {
}
@Override
protected void verifyDirStatus(FileStatus status) {
S3ATestUtils.verifyDirStatus(status, REPLICATION, getModTime(),
getAccessTime(), OWNER, GROUP, PERMISSION);
protected void verifyDirStatus(S3AFileStatus status) {
S3ATestUtils.verifyDirStatus(status, REPLICATION, OWNER);
}
}

View File

@ -0,0 +1,380 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.s3guard;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import com.amazonaws.services.s3.Headers;
import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest;
import com.amazonaws.services.s3.model.CompleteMultipartUploadResult;
import com.amazonaws.services.s3.model.GetObjectMetadataRequest;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest;
import com.amazonaws.services.s3.model.InitiateMultipartUploadResult;
import com.amazonaws.services.s3.model.ListObjectsV2Request;
import com.amazonaws.services.s3.model.ListObjectsV2Result;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.PutObjectResult;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.UploadPartRequest;
import com.amazonaws.services.s3.model.UploadPartResult;
import org.hamcrest.BaseMatcher;
import org.hamcrest.Description;
import org.hamcrest.Matcher;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.AbstractS3AMockTest;
import org.apache.hadoop.fs.s3a.Constants;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE;
import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_MODE_SERVER;
import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_SOURCE;
import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_SOURCE_ETAG;
import static org.apache.hadoop.fs.s3a.Constants.CHANGE_DETECT_SOURCE_VERSION_ID;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.when;
import static org.mockito.hamcrest.MockitoHamcrest.argThat;
/**
* Unit tests to ensure object eTag and versionId are captured on S3 PUT and
* used on GET.
* Further (integration) testing is performed in
* {@link org.apache.hadoop.fs.s3a.ITestS3ARemoteFileChanged}.
*/
@RunWith(Parameterized.class)
public class TestObjectChangeDetectionAttributes extends AbstractS3AMockTest {
private final String changeDetectionSource;
public TestObjectChangeDetectionAttributes(String changeDetectionSource) {
this.changeDetectionSource = changeDetectionSource;
}
@Parameterized.Parameters(name = "change={0}")
public static Collection<Object[]> params() {
return Arrays.asList(new Object[][]{
{CHANGE_DETECT_SOURCE_ETAG},
{CHANGE_DETECT_SOURCE_VERSION_ID}
});
}
@Override
public Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
conf.setClass(Constants.S3_METADATA_STORE_IMPL,
LocalMetadataStore.class, MetadataStore.class);
conf.set(CHANGE_DETECT_SOURCE, changeDetectionSource);
conf.set(CHANGE_DETECT_MODE, CHANGE_DETECT_MODE_SERVER);
return conf;
}
/**
* Tests a file uploaded with a single PUT to ensure eTag is captured and used
* on file read.
*/
@Test
public void testCreateAndReadFileSinglePart() throws Exception {
String bucket = "s3a://mock-bucket/";
String file = "single-part-file";
Path path = new Path(bucket, file);
byte[] content = "content".getBytes();
String eTag = "abc";
String versionId = "def";
putObject(file, path, content, eTag, versionId);
// make sure the eTag and versionId were put into the metadataStore
assertVersionAttributes(path, eTag, versionId);
// Ensure underlying S3 getObject call uses the stored eTag or versionId
// when reading data back. If it doesn't, the read won't work and the
// assert will fail.
assertContent(file, path, content, eTag, versionId);
// test overwrite
byte[] newConent = "newcontent".getBytes();
String newETag = "newETag";
String newVersionId = "newVersionId";
putObject(file, path, newConent, newETag, newVersionId);
assertVersionAttributes(path, newETag, newVersionId);
assertContent(file, path, newConent, newETag, newVersionId);
}
/**
* Tests a file uploaded with multi-part upload to ensure eTag is captured
* and used on file read.
*/
@Test
public void testCreateAndReadFileMultiPart() throws Exception {
String bucket = "s3a://mock-bucket/";
String file = "multi-part-file";
Path path = new Path(bucket, file);
byte[] content = new byte[Constants.MULTIPART_MIN_SIZE + 1];
String eTag = "abc";
String versionId = "def";
multipartUpload(file, path, content, eTag, versionId);
// make sure the eTag and versionId were put into the metadataStore
assertVersionAttributes(path, eTag, versionId);
// Ensure underlying S3 getObject call uses the stored eTag or versionId
// when reading data back. If it doesn't, the read won't work and the
// assert will fail.
assertContent(file, path, content, eTag, versionId);
// test overwrite
byte[] newContent = new byte[Constants.MULTIPART_MIN_SIZE + 1];
Arrays.fill(newContent, (byte) 1);
String newETag = "newETag";
String newVersionId = "newVersionId";
multipartUpload(file, path, newContent, newETag, newVersionId);
assertVersionAttributes(path, newETag, newVersionId);
assertContent(file, path, newContent, newETag, newVersionId);
}
private void putObject(String file, Path path, byte[] content,
String eTag, String versionId) throws IOException {
PutObjectResult putObjectResult = new PutObjectResult();
ObjectMetadata objectMetadata = new ObjectMetadata();
objectMetadata.setContentLength(content.length);
putObjectResult.setMetadata(objectMetadata);
putObjectResult.setETag(eTag);
putObjectResult.setVersionId(versionId);
when(s3.getObjectMetadata(any(GetObjectMetadataRequest.class)))
.thenThrow(NOT_FOUND);
when(s3.putObject(argThat(correctPutObjectRequest(file))))
.thenReturn(putObjectResult);
ListObjectsV2Result emptyListing = new ListObjectsV2Result();
when(s3.listObjectsV2(argThat(correctListObjectsRequest(file + "/"))))
.thenReturn(emptyListing);
FSDataOutputStream outputStream = fs.create(path);
outputStream.write(content);
outputStream.close();
}
private void multipartUpload(String file, Path path, byte[] content,
String eTag, String versionId) throws IOException {
CompleteMultipartUploadResult uploadResult =
new CompleteMultipartUploadResult();
uploadResult.setVersionId(versionId);
when(s3.getObjectMetadata(any(GetObjectMetadataRequest.class)))
.thenThrow(NOT_FOUND);
InitiateMultipartUploadResult initiateMultipartUploadResult =
new InitiateMultipartUploadResult();
initiateMultipartUploadResult.setUploadId("uploadId");
when(s3.initiateMultipartUpload(
argThat(correctInitiateMultipartUploadRequest(file))))
.thenReturn(initiateMultipartUploadResult);
UploadPartResult uploadPartResult = new UploadPartResult();
uploadPartResult.setETag("partETag");
when(s3.uploadPart(argThat(correctUploadPartRequest(file))))
.thenReturn(uploadPartResult);
CompleteMultipartUploadResult multipartUploadResult =
new CompleteMultipartUploadResult();
multipartUploadResult.setETag(eTag);
multipartUploadResult.setVersionId(versionId);
when(s3.completeMultipartUpload(
argThat(correctMultipartUploadRequest(file))))
.thenReturn(multipartUploadResult);
ListObjectsV2Result emptyListing = new ListObjectsV2Result();
when(s3.listObjectsV2(argThat(correctListObjectsRequest(file + "/"))))
.thenReturn(emptyListing);
FSDataOutputStream outputStream = fs.create(path);
outputStream.write(content);
outputStream.close();
}
private void assertContent(String file, Path path, byte[] content,
String eTag, String versionId) throws IOException {
S3Object s3Object = new S3Object();
ObjectMetadata metadata = new ObjectMetadata();
metadata.setHeader(Headers.S3_VERSION_ID, versionId);
metadata.setHeader(Headers.ETAG, eTag);
s3Object.setObjectMetadata(metadata);
s3Object.setObjectContent(new ByteArrayInputStream(content));
when(s3.getObject(argThat(correctGetObjectRequest(file, eTag, versionId))))
.thenReturn(s3Object);
FSDataInputStream inputStream = fs.open(path);
byte[] readContent = IOUtils.toByteArray(inputStream);
assertArrayEquals(content, readContent);
}
private void assertVersionAttributes(Path path, String eTag, String versionId)
throws IOException {
MetadataStore metadataStore = fs.getMetadataStore();
PathMetadata pathMetadata = metadataStore.get(path);
assertNotNull(pathMetadata);
S3AFileStatus fileStatus = pathMetadata.getFileStatus();
assertEquals(eTag, fileStatus.getETag());
assertEquals(versionId, fileStatus.getVersionId());
}
private Matcher<GetObjectRequest> correctGetObjectRequest(final String key,
final String eTag, final String versionId) {
return new BaseMatcher<GetObjectRequest>() {
@Override
public boolean matches(Object item) {
if (item instanceof GetObjectRequest) {
GetObjectRequest getObjectRequest = (GetObjectRequest) item;
if (getObjectRequest.getKey().equals(key)) {
if (changeDetectionSource.equals(
CHANGE_DETECT_SOURCE_ETAG)) {
return getObjectRequest.getMatchingETagConstraints()
.contains(eTag);
} else if (changeDetectionSource.equals(
CHANGE_DETECT_SOURCE_VERSION_ID)) {
return getObjectRequest.getVersionId().equals(versionId);
}
}
}
return false;
}
@Override
public void describeTo(Description description) {
description.appendText("key and "
+ changeDetectionSource
+ " matches");
}
};
}
private Matcher<UploadPartRequest> correctUploadPartRequest(
final String key) {
return new BaseMatcher<UploadPartRequest>() {
@Override
public boolean matches(Object item) {
if (item instanceof UploadPartRequest) {
UploadPartRequest request = (UploadPartRequest) item;
return request.getKey().equals(key);
}
return false;
}
@Override
public void describeTo(Description description) {
description.appendText("key matches");
}
};
}
private Matcher<InitiateMultipartUploadRequest>
correctInitiateMultipartUploadRequest(final String key) {
return new BaseMatcher<InitiateMultipartUploadRequest>() {
@Override
public void describeTo(Description description) {
description.appendText("key matches");
}
@Override
public boolean matches(Object item) {
if (item instanceof InitiateMultipartUploadRequest) {
InitiateMultipartUploadRequest request =
(InitiateMultipartUploadRequest) item;
return request.getKey().equals(key);
}
return false;
}
};
}
private Matcher<CompleteMultipartUploadRequest>
correctMultipartUploadRequest(final String key) {
return new BaseMatcher<CompleteMultipartUploadRequest>() {
@Override
public boolean matches(Object item) {
if (item instanceof CompleteMultipartUploadRequest) {
CompleteMultipartUploadRequest request =
(CompleteMultipartUploadRequest) item;
return request.getKey().equals(key);
}
return false;
}
@Override
public void describeTo(Description description) {
description.appendText("key matches");
}
};
}
private Matcher<ListObjectsV2Request> correctListObjectsRequest(
final String key) {
return new BaseMatcher<ListObjectsV2Request>() {
@Override
public boolean matches(Object item) {
if (item instanceof ListObjectsV2Request) {
ListObjectsV2Request listObjectsRequest =
(ListObjectsV2Request) item;
return listObjectsRequest.getPrefix().equals(key);
}
return false;
}
@Override
public void describeTo(Description description) {
description.appendText("key matches");
}
};
}
private Matcher<PutObjectRequest> correctPutObjectRequest(
final String key) {
return new BaseMatcher<PutObjectRequest>() {
@Override
public boolean matches(Object item) {
if (item instanceof PutObjectRequest) {
PutObjectRequest putObjectRequest = (PutObjectRequest) item;
return putObjectRequest.getKey().equals(key);
}
return false;
}
@Override
public void describeTo(Description description) {
description.appendText("key matches");
}
};
}
}

View File

@ -20,6 +20,7 @@
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.Collection;
import java.util.concurrent.Callable;
@ -31,16 +32,16 @@
import com.google.common.base.Preconditions;
import org.junit.After;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.Timeout;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.test.LambdaTestUtils;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.mockito.Mockito;
import static com.amazonaws.services.dynamodbv2.model.KeyType.HASH;
@ -58,41 +59,75 @@
* Test the PathMetadataDynamoDBTranslation is able to translate between domain
* model objects and DynamoDB items.
*/
@RunWith(Parameterized.class)
public class TestPathMetadataDynamoDBTranslation extends Assert {
private static final Path TEST_DIR_PATH = new Path("s3a://test-bucket/myDir");
private static final Item TEST_DIR_ITEM = new Item();
private static DDBPathMetadata testDirPathMetadata;
private static final long TEST_FILE_LENGTH = 100;
private static final long TEST_MOD_TIME = 9999;
private static final long TEST_BLOCK_SIZE = 128;
private static final String TEST_ETAG = "abc";
private static final String TEST_VERSION_ID = "def";
private static final Path TEST_FILE_PATH = new Path(TEST_DIR_PATH, "myFile");
private static final Item TEST_FILE_ITEM = new Item();
private static DDBPathMetadata testFilePathMetadata;
@BeforeClass
public static void setUpBeforeClass() throws IOException {
private final Item testFileItem;
private final DDBPathMetadata testFilePathMetadata;
private final Item testDirItem;
private final DDBPathMetadata testDirPathMetadata;
@Parameterized.Parameters
public static Collection<Object[]> params() throws IOException {
String username = UserGroupInformation.getCurrentUser().getShortUserName();
return Arrays.asList(new Object[][]{
// with etag and versionId
{
new Item()
.withPrimaryKey(PARENT,
pathToParentKey(TEST_FILE_PATH.getParent()),
CHILD, TEST_FILE_PATH.getName())
.withBoolean(IS_DIR, false)
.withLong(FILE_LENGTH, TEST_FILE_LENGTH)
.withLong(MOD_TIME, TEST_MOD_TIME)
.withLong(BLOCK_SIZE, TEST_BLOCK_SIZE)
.withString(ETAG, TEST_ETAG)
.withString(VERSION_ID, TEST_VERSION_ID),
new DDBPathMetadata(
new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME,
TEST_FILE_PATH, TEST_BLOCK_SIZE, username, TEST_ETAG,
TEST_VERSION_ID))
},
// without etag or versionId
{
new Item()
.withPrimaryKey(PARENT,
pathToParentKey(TEST_FILE_PATH.getParent()),
CHILD, TEST_FILE_PATH.getName())
.withBoolean(IS_DIR, false)
.withLong(FILE_LENGTH, TEST_FILE_LENGTH)
.withLong(MOD_TIME, TEST_MOD_TIME)
.withLong(BLOCK_SIZE, TEST_BLOCK_SIZE),
new DDBPathMetadata(
new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME,
TEST_FILE_PATH, TEST_BLOCK_SIZE, username, null, null))
}
});
}
public TestPathMetadataDynamoDBTranslation(Item item,
DDBPathMetadata metadata) throws IOException {
testFileItem = item;
testFilePathMetadata = metadata;
String username = UserGroupInformation.getCurrentUser().getShortUserName();
testDirPathMetadata = new DDBPathMetadata(new S3AFileStatus(false,
TEST_DIR_PATH, username));
TEST_DIR_ITEM
testDirItem = new Item();
testDirItem
.withPrimaryKey(PARENT, "/test-bucket", CHILD, TEST_DIR_PATH.getName())
.withBoolean(IS_DIR, true);
testFilePathMetadata = new DDBPathMetadata(
new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME, TEST_FILE_PATH,
TEST_BLOCK_SIZE, username));
TEST_FILE_ITEM
.withPrimaryKey(PARENT, pathToParentKey(TEST_FILE_PATH.getParent()),
CHILD, TEST_FILE_PATH.getName())
.withBoolean(IS_DIR, false)
.withLong(FILE_LENGTH, TEST_FILE_LENGTH)
.withLong(MOD_TIME, TEST_MOD_TIME)
.withLong(BLOCK_SIZE, TEST_BLOCK_SIZE);
}
/**
@ -138,8 +173,8 @@ public void testItemToPathMetadata() throws IOException {
UserGroupInformation.getCurrentUser().getShortUserName();
assertNull(itemToPathMetadata(null, user));
verify(TEST_DIR_ITEM, itemToPathMetadata(TEST_DIR_ITEM, user));
verify(TEST_FILE_ITEM, itemToPathMetadata(TEST_FILE_ITEM, user));
verify(testDirItem, itemToPathMetadata(testDirItem, user));
verify(testFileItem, itemToPathMetadata(testFileItem, user));
}
/**
@ -147,7 +182,7 @@ public void testItemToPathMetadata() throws IOException {
*/
private static void verify(Item item, PathMetadata meta) {
assertNotNull(meta);
final FileStatus status = meta.getFileStatus();
final S3AFileStatus status = meta.getFileStatus();
final Path path = status.getPath();
assertEquals(item.get(PARENT), pathToParentKey(path.getParent()));
assertEquals(item.get(CHILD), path.getName());
@ -157,6 +192,10 @@ private static void verify(Item item, PathMetadata meta) {
assertEquals(len, status.getLen());
long bSize = item.hasAttribute(BLOCK_SIZE) ? item.getLong(BLOCK_SIZE) : 0;
assertEquals(bSize, status.getBlockSize());
String eTag = item.getString(ETAG);
assertEquals(eTag, status.getETag());
String versionId = item.getString(VERSION_ID);
assertEquals(versionId, status.getVersionId());
/*
* S3AFileStatue#getModificationTime() reports the current time, so the
@ -252,7 +291,7 @@ public void testVersionMarkerNotStatusIllegalPath() throws Throwable {
@Test
public void testIsAuthoritativeCompatibilityItemToPathMetadata()
throws Exception {
Item item = Mockito.spy(TEST_DIR_ITEM);
Item item = Mockito.spy(testDirItem);
item.withBoolean(IS_AUTHORITATIVE, true);
PathMetadataDynamoDBTranslation.IGNORED_FIELDS.add(IS_AUTHORITATIVE);
@ -288,7 +327,7 @@ public void testIsAuthoritativeCompatibilityPathMetadataToItem() {
@Test
public void testIsLastUpdatedCompatibilityItemToPathMetadata()
throws Exception {
Item item = Mockito.spy(TEST_DIR_ITEM);
Item item = Mockito.spy(testDirItem);
item.withLong(LAST_UPDATED, 100);
PathMetadataDynamoDBTranslation.IGNORED_FIELDS.add(LAST_UPDATED);

View File

@ -26,6 +26,8 @@
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.Tristate;
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_METADATASTORE_AUTHORITATIVE_DIR_TTL;
@ -51,7 +53,7 @@ public void testDirListingUnion() throws Exception {
Arrays.asList(m1, m2), false);
// Two other files in s3
List<FileStatus> s3Listing = Arrays.asList(
List<S3AFileStatus> s3Listing = Arrays.asList(
makeFileStatus("s3a://bucket/dir/s3-file3", false),
makeFileStatus("s3a://bucket/dir/s3-file4", false)
);
@ -86,12 +88,15 @@ private PathMetadata makePathMeta(String pathStr, boolean isDir) {
return new PathMetadata(makeFileStatus(pathStr, isDir));
}
private FileStatus makeFileStatus(String pathStr, boolean isDir) {
private S3AFileStatus makeFileStatus(String pathStr, boolean isDir) {
Path p = new Path(pathStr);
S3AFileStatus fileStatus;
if (isDir) {
return new FileStatus(0, true, 1, 1, System.currentTimeMillis(), p);
fileStatus = new S3AFileStatus(Tristate.UNKNOWN, p, null);
} else {
return new FileStatus(100, false, 1, 1, System.currentTimeMillis(), p);
}
fileStatus = new S3AFileStatus(
100, System.currentTimeMillis(), p, 1, null, null, null);
}
return fileStatus;
}
}

View File

@ -170,7 +170,8 @@ protected S3AFileStatus copyStatus(S3AFileStatus status) {
status.getOwner());
} else {
return new S3AFileStatus(status.getLen(), status.getModificationTime(),
status.getPath(), status.getBlockSize(), status.getOwner());
status.getPath(), status.getBlockSize(), status.getOwner(),
status.getETag(), status.getVersionId());
}
}
@ -207,7 +208,8 @@ private static void printTiming(Logger log, String op, NanoTimer timer,
}
protected static S3AFileStatus makeFileStatus(Path path) throws IOException {
return new S3AFileStatus(SIZE, ACCESS_TIME, path, BLOCK_SIZE, OWNER);
return new S3AFileStatus(SIZE, ACCESS_TIME, path, BLOCK_SIZE, OWNER,
null, null);
}
protected static S3AFileStatus makeDirStatus(Path p) throws IOException {

View File

@ -25,6 +25,7 @@
import java.nio.charset.Charset;
import java.util.List;
import org.junit.Assume;
import org.junit.Test;
import org.apache.commons.io.IOUtils;
@ -34,6 +35,8 @@
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
import org.apache.hadoop.fs.s3a.Statistic;
import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy;
import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source;
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.OperationDuration;
@ -80,6 +83,11 @@ public void setup() throws Exception {
selectConf = new Configuration(getConfiguration());
localFile = getTempFilename();
landsatSrc = getLandsatGZ().toString();
ChangeDetectionPolicy changeDetectionPolicy =
getLandsatFS().getChangeDetectionPolicy();
Assume.assumeFalse("the standard landsat bucket doesn't have versioning",
changeDetectionPolicy.getSource() == Source.VersionId
&& changeDetectionPolicy.isRequireVersion());
}
@Override

View File

@ -21,6 +21,7 @@
import java.io.IOException;
import java.util.List;
import org.junit.Assume;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -32,6 +33,8 @@
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy;
import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.S3AInstrumentation;
import org.apache.hadoop.fs.s3a.S3ATestUtils;
@ -190,6 +193,11 @@ public void setup() throws Exception {
// disable the gzip codec, so that the record readers do not
// get confused
enablePassthroughCodec(selectConf, ".gz");
ChangeDetectionPolicy changeDetectionPolicy =
getLandsatFS().getChangeDetectionPolicy();
Assume.assumeFalse("the standard landsat bucket doesn't have versioning",
changeDetectionPolicy.getSource() == Source.VersionId
&& changeDetectionPolicy.isRequireVersion());
}
protected int getMaxLines() {

View File

@ -21,6 +21,9 @@
import java.io.IOException;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy;
import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy.Source;
import org.junit.Assume;
import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
@ -90,6 +93,13 @@ public class ITestS3SelectMRJob extends AbstractS3SelectTest {
public void setup() throws Exception {
super.setup();
fs = S3ATestUtils.createTestFileSystem(conf);
ChangeDetectionPolicy changeDetectionPolicy =
getLandsatFS().getChangeDetectionPolicy();
Assume.assumeFalse("the standard landsat bucket doesn't have versioning",
changeDetectionPolicy.getSource() == Source.VersionId
&& changeDetectionPolicy.isRequireVersion());
rootPath = path("ITestS3SelectMRJob");
Path workingDir = path("working");
fs.setWorkingDirectory(workingDir);

View File

@ -48,7 +48,7 @@ public void testS3AFilesArePrivate() throws Throwable {
@Test
public void testS3AFilesArePrivateOtherContstructor() throws Throwable {
S3AFileStatus status = new S3AFileStatus(0, 0, PATH, 1, "self");
S3AFileStatus status = new S3AFileStatus(0, 0, PATH, 1, "self", null, null);
assertTrue("Not encrypted: " + status, status.isEncrypted());
assertNotExecutable(status);
}