HADOOP-17293. S3A to always probe S3 in S3A getFileStatus on non-auth paths
This reverts changes in HADOOP-13230 to use S3Guard TTL in choosing when to issue a HEAD request; fixing tests to compensate. New org.apache.hadoop.fs.s3a.performance.OperationCost cost, S3GUARD_NONAUTH_FILE_STATUS_PROBE for use in cost tests. Contributed by Steve Loughran. Change-Id: I418d55d2d2562a48b2a14ec7dee369db49b4e29e
This commit is contained in:
parent
44ff4c1058
commit
963793dd48
@ -2980,55 +2980,31 @@ S3AFileStatus innerGetFileStatus(final Path f,
|
|||||||
// a file has been found in a non-auth path and the caller has not said
|
// a file has been found in a non-auth path and the caller has not said
|
||||||
// they only care about directories
|
// they only care about directories
|
||||||
LOG.debug("Metadata for {} found in the non-auth metastore.", path);
|
LOG.debug("Metadata for {} found in the non-auth metastore.", path);
|
||||||
// If the timestamp of the pm is close to "now", we don't need to
|
final long msModTime = pm.getFileStatus().getModificationTime();
|
||||||
// bother with a check of S3. that means:
|
|
||||||
// one of : status modtime is close to now,
|
|
||||||
// or pm.getLastUpdated() == now
|
|
||||||
|
|
||||||
// get the time in which a status modtime is considered valid
|
S3AFileStatus s3AFileStatus;
|
||||||
// in a non-auth metastore
|
try {
|
||||||
long validTime =
|
s3AFileStatus = s3GetFileStatus(path,
|
||||||
ttlTimeProvider.getNow() - ttlTimeProvider.getMetadataTtl();
|
key,
|
||||||
final long msModTime = msStatus.getModificationTime();
|
probes,
|
||||||
|
tombstones,
|
||||||
|
needEmptyDirectoryFlag);
|
||||||
|
} catch (FileNotFoundException fne) {
|
||||||
|
LOG.trace("File Not Found from probes for {}", key, fne);
|
||||||
|
s3AFileStatus = null;
|
||||||
|
}
|
||||||
|
if (s3AFileStatus == null) {
|
||||||
|
LOG.warn("Failed to find file {}. Either it is not yet visible, or "
|
||||||
|
+ "it has been deleted.", path);
|
||||||
|
} else {
|
||||||
|
final long s3ModTime = s3AFileStatus.getModificationTime();
|
||||||
|
|
||||||
if (msModTime < validTime) {
|
if(s3ModTime > msModTime) {
|
||||||
LOG.debug("Metastore entry of {} is out of date, probing S3", path);
|
LOG.debug("S3Guard metadata for {} is outdated;"
|
||||||
try {
|
+ " s3modtime={}; msModTime={} updating metastore",
|
||||||
S3AFileStatus s3AFileStatus = s3GetFileStatus(path,
|
path, s3ModTime, msModTime);
|
||||||
key,
|
return S3Guard.putAndReturn(metadataStore, s3AFileStatus,
|
||||||
probes,
|
ttlTimeProvider);
|
||||||
tombstones,
|
|
||||||
needEmptyDirectoryFlag);
|
|
||||||
// if the new status is more current than that in the metastore,
|
|
||||||
// it means S3 has changed and the store needs updating
|
|
||||||
final long s3ModTime = s3AFileStatus.getModificationTime();
|
|
||||||
|
|
||||||
if (s3ModTime > msModTime) {
|
|
||||||
// there's new data in S3
|
|
||||||
LOG.debug("S3Guard metadata for {} is outdated;"
|
|
||||||
+ " s3modtime={}; msModTime={} updating metastore",
|
|
||||||
path, s3ModTime, msModTime);
|
|
||||||
// add to S3Guard
|
|
||||||
S3Guard.putAndReturn(metadataStore, s3AFileStatus,
|
|
||||||
ttlTimeProvider);
|
|
||||||
} else {
|
|
||||||
// the modtime of the data is the same as/older than the s3guard
|
|
||||||
// value either an old object has been found, or the existing one
|
|
||||||
// was retrieved in both cases -refresh the S3Guard entry so the
|
|
||||||
// record's TTL is updated.
|
|
||||||
S3Guard.refreshEntry(metadataStore, pm, s3AFileStatus,
|
|
||||||
ttlTimeProvider);
|
|
||||||
}
|
|
||||||
// return the value
|
|
||||||
// note that the checks for empty dir status below can be skipped
|
|
||||||
// because the call to s3GetFileStatus include the checks there
|
|
||||||
return s3AFileStatus;
|
|
||||||
} catch (FileNotFoundException fne) {
|
|
||||||
// the attempt to refresh the record failed because there was
|
|
||||||
// no entry. Either it is a new file not visible, or it
|
|
||||||
// has been deleted (and therefore S3Guard is out of sync with S3)
|
|
||||||
LOG.warn("Failed to find file {}. Either it is not yet visible, or "
|
|
||||||
+ "it has been deleted.", path);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -314,7 +314,7 @@ public void testListStatusEncryptedFile() throws Exception {
|
|||||||
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
|
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
|
||||||
|
|
||||||
//Until this point, no exception is thrown about access
|
//Until this point, no exception is thrown about access
|
||||||
if (!fsKeyB.hasMetadataStore()) {
|
if (statusProbesCheckS3(fsKeyB, fileToStat)) {
|
||||||
intercept(AccessDeniedException.class,
|
intercept(AccessDeniedException.class,
|
||||||
SERVICE_AMAZON_S3_STATUS_CODE_403,
|
SERVICE_AMAZON_S3_STATUS_CODE_403,
|
||||||
() -> fsKeyB.listStatus(fileToStat));
|
() -> fsKeyB.listStatus(fileToStat));
|
||||||
@ -323,6 +323,16 @@ public void testListStatusEncryptedFile() throws Exception {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Do file status probes check S3?
|
||||||
|
* @param fs filesystem
|
||||||
|
* @param path file path
|
||||||
|
* @return true if check for a path being a file will issue a HEAD request.
|
||||||
|
*/
|
||||||
|
private boolean statusProbesCheckS3(S3AFileSystem fs, Path path) {
|
||||||
|
return !fs.hasMetadataStore() || !fs.allowAuthoritative(path);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* It is possible to delete directories without the proper encryption key and
|
* It is possible to delete directories without the proper encryption key and
|
||||||
* the hierarchy above it.
|
* the hierarchy above it.
|
||||||
@ -340,7 +350,7 @@ public void testDeleteEncryptedObjectWithDifferentKey() throws Exception {
|
|||||||
Path fileToDelete = new Path(pathABC, "filetobedeleted.txt");
|
Path fileToDelete = new Path(pathABC, "filetobedeleted.txt");
|
||||||
writeThenReadFile(fileToDelete, TEST_FILE_LEN);
|
writeThenReadFile(fileToDelete, TEST_FILE_LEN);
|
||||||
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
|
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
|
||||||
if (!fsKeyB.hasMetadataStore()) {
|
if (statusProbesCheckS3(fsKeyB, fileToDelete)) {
|
||||||
intercept(AccessDeniedException.class,
|
intercept(AccessDeniedException.class,
|
||||||
SERVICE_AMAZON_S3_STATUS_CODE_403,
|
SERVICE_AMAZON_S3_STATUS_CODE_403,
|
||||||
() -> fsKeyB.delete(fileToDelete, false));
|
() -> fsKeyB.delete(fileToDelete, false));
|
||||||
|
@ -92,7 +92,8 @@ public void testCostOfLocatedFileStatusOnFile() throws Throwable {
|
|||||||
whenRaw(FILE_STATUS_FILE_PROBE
|
whenRaw(FILE_STATUS_FILE_PROBE
|
||||||
.plus(LIST_LOCATED_STATUS_LIST_OP)),
|
.plus(LIST_LOCATED_STATUS_LIST_OP)),
|
||||||
whenAuthoritative(LIST_LOCATED_STATUS_LIST_OP),
|
whenAuthoritative(LIST_LOCATED_STATUS_LIST_OP),
|
||||||
whenNonauth(LIST_LOCATED_STATUS_LIST_OP));
|
whenNonauth(LIST_LOCATED_STATUS_LIST_OP
|
||||||
|
.plus(S3GUARD_NONAUTH_FILE_STATUS_PROBE)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -187,7 +188,8 @@ public void testCostOfListStatusOnFile() throws Throwable {
|
|||||||
whenRaw(LIST_STATUS_LIST_OP
|
whenRaw(LIST_STATUS_LIST_OP
|
||||||
.plus(GET_FILE_STATUS_ON_FILE)),
|
.plus(GET_FILE_STATUS_ON_FILE)),
|
||||||
whenAuthoritative(LIST_STATUS_LIST_OP),
|
whenAuthoritative(LIST_STATUS_LIST_OP),
|
||||||
whenNonauth(LIST_STATUS_LIST_OP));
|
whenNonauth(LIST_STATUS_LIST_OP
|
||||||
|
.plus(S3GUARD_NONAUTH_FILE_STATUS_PROBE)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -326,7 +326,7 @@ protected Path path() throws IOException {
|
|||||||
* @return a number >= 0.
|
* @return a number >= 0.
|
||||||
*/
|
*/
|
||||||
private int getFileStatusHeadCount() {
|
private int getFileStatusHeadCount() {
|
||||||
return authMode ? 0 : 0;
|
return authMode ? 0 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -422,7 +422,7 @@ public void checkBasicFileOperations() throws Throwable {
|
|||||||
|
|
||||||
readonlyFS.getFileStatus(emptyDir);
|
readonlyFS.getFileStatus(emptyDir);
|
||||||
// now look at a file; the outcome depends on the mode.
|
// now look at a file; the outcome depends on the mode.
|
||||||
accessDeniedIf(!s3guard, () ->
|
accessDeniedIf(!guardedInAuthMode, () ->
|
||||||
readonlyFS.getFileStatus(subdirFile));
|
readonlyFS.getFileStatus(subdirFile));
|
||||||
|
|
||||||
// irrespective of mode, the attempt to read the data will fail.
|
// irrespective of mode, the attempt to read the data will fail.
|
||||||
@ -437,7 +437,7 @@ public void checkBasicFileOperations() throws Throwable {
|
|||||||
// This means that permissions on the file do not get checked.
|
// This means that permissions on the file do not get checked.
|
||||||
// See: HADOOP-16464.
|
// See: HADOOP-16464.
|
||||||
Optional<FSDataInputStream> optIn = accessDeniedIf(
|
Optional<FSDataInputStream> optIn = accessDeniedIf(
|
||||||
!s3guard, () -> readonlyFS.open(emptyFile));
|
!guardedInAuthMode, () -> readonlyFS.open(emptyFile));
|
||||||
if (optIn.isPresent()) {
|
if (optIn.isPresent()) {
|
||||||
try (FSDataInputStream is = optIn.get()) {
|
try (FSDataInputStream is = optIn.get()) {
|
||||||
Assertions.assertThat(is.read())
|
Assertions.assertThat(is.read())
|
||||||
@ -455,8 +455,8 @@ public void checkGlobOperations() throws Throwable {
|
|||||||
describe("Glob Status operations");
|
describe("Glob Status operations");
|
||||||
// baseline: the real filesystem on a subdir
|
// baseline: the real filesystem on a subdir
|
||||||
globFS(getFileSystem(), subdirFile, null, false, 1);
|
globFS(getFileSystem(), subdirFile, null, false, 1);
|
||||||
// a file fails if not guarded
|
// a file fails if not in auth mode
|
||||||
globFS(readonlyFS, subdirFile, null, !s3guard, 1);
|
globFS(readonlyFS, subdirFile, null, !guardedInAuthMode, 1);
|
||||||
// empty directories don't fail.
|
// empty directories don't fail.
|
||||||
FileStatus[] st = globFS(readonlyFS, emptyDir, null, false, 1);
|
FileStatus[] st = globFS(readonlyFS, emptyDir, null, false, 1);
|
||||||
if (s3guard) {
|
if (s3guard) {
|
||||||
@ -554,7 +554,7 @@ public void checkLocatedFileStatusScanFile() throws Throwable {
|
|||||||
true,
|
true,
|
||||||
TEXT_FILE,
|
TEXT_FILE,
|
||||||
true);
|
true);
|
||||||
accessDeniedIf(!s3guard,
|
accessDeniedIf(!guardedInAuthMode,
|
||||||
() -> fetcher.getFileStatuses())
|
() -> fetcher.getFileStatuses())
|
||||||
.ifPresent(stats -> {
|
.ifPresent(stats -> {
|
||||||
Assertions.assertThat(stats)
|
Assertions.assertThat(stats)
|
||||||
@ -619,7 +619,7 @@ public void checkLocatedFileStatusNonexistentPath() throws Throwable {
|
|||||||
public void checkDeleteOperations() throws Throwable {
|
public void checkDeleteOperations() throws Throwable {
|
||||||
describe("Testing delete operations");
|
describe("Testing delete operations");
|
||||||
readonlyFS.delete(emptyDir, true);
|
readonlyFS.delete(emptyDir, true);
|
||||||
if (!s3guard) {
|
if (!authMode) {
|
||||||
// to fail on HEAD
|
// to fail on HEAD
|
||||||
accessDenied(() -> readonlyFS.delete(emptyFile, true));
|
accessDenied(() -> readonlyFS.delete(emptyFile, true));
|
||||||
} else {
|
} else {
|
||||||
|
@ -154,6 +154,13 @@ public final class OperationCost {
|
|||||||
public static final OperationCost CREATE_FILE_NO_OVERWRITE =
|
public static final OperationCost CREATE_FILE_NO_OVERWRITE =
|
||||||
FILE_STATUS_ALL_PROBES;
|
FILE_STATUS_ALL_PROBES;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* S3Guard in non-auth mode always attempts a single file
|
||||||
|
* status call.
|
||||||
|
*/
|
||||||
|
public static final OperationCost S3GUARD_NONAUTH_FILE_STATUS_PROBE =
|
||||||
|
FILE_STATUS_FILE_PROBE;
|
||||||
|
|
||||||
/** Expected HEAD count. */
|
/** Expected HEAD count. */
|
||||||
private final int head;
|
private final int head;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user