diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirMarkerTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirMarkerTracker.java index ca04fed65a..9390c69933 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirMarkerTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirMarkerTracker.java @@ -34,20 +34,20 @@ * Tracks directory markers which have been reported in object listings. * This is needed for auditing and cleanup, including during rename * operations. - *
+ ** Designed to be used while scanning through the results of listObject * calls, where are we assume the results come in alphanumeric sort order * and parent entries before children. - *
+ ** This lets as assume that we can identify all leaf markers as those * markers which were added to set of leaf markers and not subsequently * removed as a child entries were discovered. - *
+ ** To avoid scanning datastructures excessively, the path of the parent * directory of the last file added is cached. This allows for a * quick bailout when many children of the same directory are * returned in a listing. - *
+ ** Consult the directory_markers document for details on this feature, * including terminology. */ @@ -106,7 +106,7 @@ public class DirMarkerTracker { /** * Construct. - *
+ ** The base path is currently only used for information rather than * validating paths supplied in other methods. * @param basePath base path of track @@ -128,7 +128,7 @@ public Path getBasePath() { /** * A marker has been found; this may or may not be a leaf. - *
+ *
* Trigger a move of all markers above it into the surplus map.
* @param path marker path
* @param key object key
@@ -160,7 +160,7 @@ public List
* Declare all markers above it as surplus
* @param path marker path
* @param key object key
@@ -187,7 +187,7 @@ private List
* if {@link #recordSurplusMarkers} is true, the marker is
* moved to the surplus map. Not doing this is simply an
* optimisation designed to reduce risk of excess memory consumption
@@ -223,7 +223,7 @@ public Map
* Empty if they were not being recorded.
* @return all surplus markers.
*/
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirectoryPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirectoryPolicy.java
index 36dd2e4fd2..6ba74c7e97 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirectoryPolicy.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirectoryPolicy.java
@@ -69,21 +69,21 @@ enum MarkerPolicy {
/**
* Delete markers.
- *
* This is the classic S3A policy,
*/
Delete(DIRECTORY_MARKER_POLICY_DELETE),
/**
* Keep markers.
- *
* This is Not backwards compatible.
*/
Keep(DIRECTORY_MARKER_POLICY_KEEP),
/**
* Keep markers in authoritative paths only.
- *
* This is Not backwards compatible within the
* auth paths, but is outside these.
*/
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java
index ecfe2c0ba0..5d17ae91b8 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java
@@ -119,6 +119,7 @@ RemoteIterator
* The parallel execution is in groups of size
* {@link InternalConstants#RENAME_PARALLEL_LIMIT}; it is only
* after one group completes that the next group is initiated.
- *
* Once enough files have been copied that they meet the
* {@link InternalConstants#MAX_ENTRIES_TO_DELETE} threshold, a delete
* is initiated.
* If it succeeds, the rename continues with the next group of files.
- *
* Directory Markers which have child entries are never copied; only those
* which represent empty directories are copied in the rename.
* The {@link DirMarkerTracker} tracks which markers must be copied, and
@@ -71,10 +71,10 @@
* the copied tree. This is to ensure that even if a directory tree
* is copied from an authoritative path to a non-authoritative one
* there is never any contamination of the non-auth path with markers.
- *
* The rename operation implements the classic HDFS rename policy of
* rename(file, dir) renames the file under the directory.
- *
*
* There is no validation of input and output paths.
* Callers are required to themselves verify that destination is not under
@@ -178,7 +178,7 @@ private void completeActiveCopies(String reason) throws IOException {
/**
* Queue an object for deletion.
- *
* This object will be deleted when the next page of objects to delete
* is posted to S3. Therefore, the COPY must have finished
* before that deletion operation takes place.
@@ -204,9 +204,9 @@ private void queueToDelete(Path path, String key) {
/**
* Queue a list of markers for deletion.
- *
* no-op if the list is empty.
- *
* See {@link #queueToDelete(Path, String)} for
* details on safe use of this method.
*
@@ -221,7 +221,7 @@ private void queueToDelete(
/**
* Queue a single marker for deletion.
- *
* See {@link #queueToDelete(Path, String)} for
* details on safe use of this method.
*
@@ -427,7 +427,7 @@ protected void recursiveDirectoryRename() throws IOException {
/**
* Operations to perform at the end of every loop iteration.
- *
* This may block the thread waiting for copies to complete
* and/or delete a page of data.
*/
@@ -448,11 +448,11 @@ private void endOfLoopActions() throws IOException {
/**
* Process all directory markers at the end of the rename.
* All leaf markers are queued to be copied in the store;
- *
* Why not simply create new markers? All the metadata
* gets copied too, so if there was anything relevant then
* it would be preserved.
- *
* At the same time: markers aren't valued much and may
* be deleted without any safety checks -so if there was relevant
* data it is at risk of destruction at any point.
@@ -461,7 +461,7 @@ private void endOfLoopActions() throws IOException {
* Be advised though: the costs of the copy not withstanding,
* it is a lot easier to have one single type of scheduled copy operation
* than have copy and touch calls being scheduled.
- *
* The duration returned is the time to initiate all copy/delete operations,
* including any blocking waits for active copies and paged deletes
* to execute. There may still be outstanding operations
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
index 1d52b0a34e..3c16d87fe1 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
@@ -82,7 +82,7 @@
/**
* CLI to manage S3Guard Metadata Store.
- *
* Some management tools invoke this class directly.
*/
@InterfaceAudience.LimitedPrivate("management tools")
@@ -526,7 +526,6 @@ public int run(String[] args, PrintStream out)
* Validate the marker options.
* @param out output stream
* @param fs filesystem
- * @param path test path
* @param marker desired marker option -may be null.
*/
private void processMarkerOption(final PrintStream out,
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java
index 230f07793d..4ddc5f9478 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java
@@ -147,7 +147,7 @@ public final class MarkerTool extends S3GuardTool {
/**
* Constant to use when there is no limit on the number of
* objects listed: {@value}.
- *
* The value is 0 and not -1 because it allows for the limit to be
* set on the command line {@code -limit 0}.
* The command line parser rejects {@code -limit -1} as the -1
@@ -475,17 +475,23 @@ public String toString() {
'}';
}
- /** Exit code to report. */
+ /**
+ * @return Exit code to report.
+ */
public int getExitCode() {
return exitCode;
}
- /** Tracker which did the scan. */
+ /**
+ * @return Tracker which did the scan.
+ */
public DirMarkerTracker getTracker() {
return tracker;
}
- /** Summary of purge. Null if none took place. */
+ /**
+ * @return Summary of purge. Null if none took place.
+ */
public MarkerPurgeSummary getPurgeSummary() {
return purgeSummary;
}
@@ -661,7 +667,7 @@ private String suffix(final int size) {
* @param path path to scan
* @param tracker tracker to update
* @param limit limit of files to scan; -1 for 'unlimited'
- * @return true if the scan completedly scanned the entire tree
+ * @return true if the scan completely scanned the entire tree
* @throws IOException IO failure
*/
@Retries.RetryTranslated
@@ -840,6 +846,7 @@ public void setVerbose(final boolean verbose) {
* Execute the marker tool, with no checks on return codes.
*
* @param scanArgs set of args for the scanner.
+ * @throws IOException IO failure
* @return the result
*/
@SuppressWarnings("IOResourceOpenedButNotSafelyClosed")
@@ -853,9 +860,9 @@ public static MarkerTool.ScanResult execMarkerTool(
/**
* Arguments for the scan.
- *
* Uses a builder/argument object because too many arguments were
- * being created and it was making maintenance harder.
+ * being created, and it was making maintenance harder.
*/
public static final class ScanArgs {
@@ -960,43 +967,71 @@ public static final class ScanArgsBuilder {
/** Consider only markers in nonauth paths as errors. */
private boolean nonAuth = false;
- /** Source FS; must be or wrap an S3A FS. */
+ /**
+ * Source FS; must be or wrap an S3A FS.
+ * @param source Source FileSystem
+ * @return the builder class after scanning source FS
+ */
public ScanArgsBuilder withSourceFS(final FileSystem source) {
this.sourceFS = source;
return this;
}
- /** Path to scan. */
+ /**
+ * Path to scan.
+ * @param p path to scan
+ * @return builder class for method chaining
+ */
public ScanArgsBuilder withPath(final Path p) {
this.path = p;
return this;
}
- /** Purge? */
+ /**
+ * Should the markers be purged? This is also enabled when using the clean flag on the CLI.
+ * @param d set to purge if true
+ * @return builder class for method chaining
+ */
public ScanArgsBuilder withDoPurge(final boolean d) {
this.doPurge = d;
return this;
}
- /** Min marker count (ignored on purge). */
+ /**
+ * Min marker count an audit must find (ignored on purge).
+ * @param min Minimum Marker Count (default 0)
+ * @return builder class for method chaining
+ */
public ScanArgsBuilder withMinMarkerCount(final int min) {
this.minMarkerCount = min;
return this;
}
- /** Max marker count (ignored on purge). */
+ /**
+ * Max marker count an audit must find (ignored on purge).
+ * @param max Maximum Marker Count (default 0)
+ * @return builder class for method chaining
+ */
public ScanArgsBuilder withMaxMarkerCount(final int max) {
this.maxMarkerCount = max;
return this;
}
- /** Limit of files to scan; 0 for 'unlimited'. */
+ /**
+ * Limit of files to scan; 0 for 'unlimited'.
+ * @param l Limit of files to scan
+ * @return builder class for method chaining
+ */
public ScanArgsBuilder withLimit(final int l) {
this.limit = l;
return this;
}
- /** Consider only markers in nonauth paths as errors. */
+ /**
+ * Consider only markers in non-authoritative paths as errors.
+ * @param b True if tool should only consider markers in non-authoritative paths
+ * @return builder class for method chaining
+ */
public ScanArgsBuilder withNonAuth(final boolean b) {
this.nonAuth = b;
return this;