diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirMarkerTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirMarkerTracker.java index ca04fed65a..9390c69933 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirMarkerTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirMarkerTracker.java @@ -34,20 +34,20 @@ * Tracks directory markers which have been reported in object listings. * This is needed for auditing and cleanup, including during rename * operations. - *

+ *

* Designed to be used while scanning through the results of listObject * calls, where are we assume the results come in alphanumeric sort order * and parent entries before children. - *

+ *

* This lets as assume that we can identify all leaf markers as those * markers which were added to set of leaf markers and not subsequently * removed as a child entries were discovered. - *

+ *

* To avoid scanning datastructures excessively, the path of the parent * directory of the last file added is cached. This allows for a * quick bailout when many children of the same directory are * returned in a listing. - *

+ *

* Consult the directory_markers document for details on this feature, * including terminology. */ @@ -106,7 +106,7 @@ public class DirMarkerTracker { /** * Construct. - *

+ *

* The base path is currently only used for information rather than * validating paths supplied in other methods. * @param basePath base path of track @@ -128,7 +128,7 @@ public Path getBasePath() { /** * A marker has been found; this may or may not be a leaf. - *

+ *

* Trigger a move of all markers above it into the surplus map. * @param path marker path * @param key object key @@ -160,7 +160,7 @@ public List fileFound(Path path, /** * A path has been found. - *

+ *

* Declare all markers above it as surplus * @param path marker path * @param key object key @@ -187,7 +187,7 @@ private List pathFound(Path path, /** * Remove all markers from the path and its parents from the * {@link #leafMarkers} map. - *

+ *

* if {@link #recordSurplusMarkers} is true, the marker is * moved to the surplus map. Not doing this is simply an * optimisation designed to reduce risk of excess memory consumption @@ -223,7 +223,7 @@ public Map getLeafMarkers() { /** * Get the map of surplus markers. - *

+ *

* Empty if they were not being recorded. * @return all surplus markers. */ diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirectoryPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirectoryPolicy.java index 36dd2e4fd2..6ba74c7e97 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirectoryPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirectoryPolicy.java @@ -69,21 +69,21 @@ enum MarkerPolicy { /** * Delete markers. - *

+ *

* This is the classic S3A policy, */ Delete(DIRECTORY_MARKER_POLICY_DELETE), /** * Keep markers. - *

+ *

* This is Not backwards compatible. */ Keep(DIRECTORY_MARKER_POLICY_KEEP), /** * Keep markers in authoritative paths only. - *

+ *

* This is Not backwards compatible within the * auth paths, but is outside these. */ diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java index ecfe2c0ba0..5d17ae91b8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java @@ -119,6 +119,7 @@ RemoteIterator listFilesAndDirectoryMarkers( * There's no update of metadata, directory markers, etc. * Callers must implement. * @param srcKey source object path + * @param destKey destination object path * @param srcAttributes S3 attributes of the source object * @param readContext the read context * @return the result of the copy diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java index bc9ad669b5..ae4d2fe7a3 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RenameOperation.java @@ -53,16 +53,16 @@ /** * A parallelized rename operation. - *

+ *

* The parallel execution is in groups of size * {@link InternalConstants#RENAME_PARALLEL_LIMIT}; it is only * after one group completes that the next group is initiated. - *

+ *

* Once enough files have been copied that they meet the * {@link InternalConstants#MAX_ENTRIES_TO_DELETE} threshold, a delete * is initiated. * If it succeeds, the rename continues with the next group of files. - *

+ *

* Directory Markers which have child entries are never copied; only those * which represent empty directories are copied in the rename. * The {@link DirMarkerTracker} tracks which markers must be copied, and @@ -71,10 +71,10 @@ * the copied tree. This is to ensure that even if a directory tree * is copied from an authoritative path to a non-authoritative one * there is never any contamination of the non-auth path with markers. - *

+ *

* The rename operation implements the classic HDFS rename policy of * rename(file, dir) renames the file under the directory. - *

+ *

* * There is no validation of input and output paths. * Callers are required to themselves verify that destination is not under @@ -178,7 +178,7 @@ private void completeActiveCopies(String reason) throws IOException { /** * Queue an object for deletion. - *

+ *

* This object will be deleted when the next page of objects to delete * is posted to S3. Therefore, the COPY must have finished * before that deletion operation takes place. @@ -204,9 +204,9 @@ private void queueToDelete(Path path, String key) { /** * Queue a list of markers for deletion. - *

+ *

* no-op if the list is empty. - *

+ *

* See {@link #queueToDelete(Path, String)} for * details on safe use of this method. * @@ -221,7 +221,7 @@ private void queueToDelete( /** * Queue a single marker for deletion. - *

+ *

* See {@link #queueToDelete(Path, String)} for * details on safe use of this method. * @@ -427,7 +427,7 @@ protected void recursiveDirectoryRename() throws IOException { /** * Operations to perform at the end of every loop iteration. - *

+ *

* This may block the thread waiting for copies to complete * and/or delete a page of data. */ @@ -448,11 +448,11 @@ private void endOfLoopActions() throws IOException { /** * Process all directory markers at the end of the rename. * All leaf markers are queued to be copied in the store; - *

+ *

* Why not simply create new markers? All the metadata * gets copied too, so if there was anything relevant then * it would be preserved. - *

+ *

* At the same time: markers aren't valued much and may * be deleted without any safety checks -so if there was relevant * data it is at risk of destruction at any point. @@ -461,7 +461,7 @@ private void endOfLoopActions() throws IOException { * Be advised though: the costs of the copy not withstanding, * it is a lot easier to have one single type of scheduled copy operation * than have copy and touch calls being scheduled. - *

+ *

* The duration returned is the time to initiate all copy/delete operations, * including any blocking waits for active copies and paged deletes * to execute. There may still be outstanding operations diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index 1d52b0a34e..3c16d87fe1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -82,7 +82,7 @@ /** * CLI to manage S3Guard Metadata Store. - *

+ *

* Some management tools invoke this class directly. */ @InterfaceAudience.LimitedPrivate("management tools") @@ -526,7 +526,6 @@ public int run(String[] args, PrintStream out) * Validate the marker options. * @param out output stream * @param fs filesystem - * @param path test path * @param marker desired marker option -may be null. */ private void processMarkerOption(final PrintStream out, diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java index 230f07793d..4ddc5f9478 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/tools/MarkerTool.java @@ -147,7 +147,7 @@ public final class MarkerTool extends S3GuardTool { /** * Constant to use when there is no limit on the number of * objects listed: {@value}. - *

+ *

* The value is 0 and not -1 because it allows for the limit to be * set on the command line {@code -limit 0}. * The command line parser rejects {@code -limit -1} as the -1 @@ -475,17 +475,23 @@ public String toString() { '}'; } - /** Exit code to report. */ + /** + * @return Exit code to report. + */ public int getExitCode() { return exitCode; } - /** Tracker which did the scan. */ + /** + * @return Tracker which did the scan. + */ public DirMarkerTracker getTracker() { return tracker; } - /** Summary of purge. Null if none took place. */ + /** + * @return Summary of purge. Null if none took place. + */ public MarkerPurgeSummary getPurgeSummary() { return purgeSummary; } @@ -661,7 +667,7 @@ private String suffix(final int size) { * @param path path to scan * @param tracker tracker to update * @param limit limit of files to scan; -1 for 'unlimited' - * @return true if the scan completedly scanned the entire tree + * @return true if the scan completely scanned the entire tree * @throws IOException IO failure */ @Retries.RetryTranslated @@ -840,6 +846,7 @@ public void setVerbose(final boolean verbose) { * Execute the marker tool, with no checks on return codes. * * @param scanArgs set of args for the scanner. + * @throws IOException IO failure * @return the result */ @SuppressWarnings("IOResourceOpenedButNotSafelyClosed") @@ -853,9 +860,9 @@ public static MarkerTool.ScanResult execMarkerTool( /** * Arguments for the scan. - *

+ *

* Uses a builder/argument object because too many arguments were - * being created and it was making maintenance harder. + * being created, and it was making maintenance harder. */ public static final class ScanArgs { @@ -960,43 +967,71 @@ public static final class ScanArgsBuilder { /** Consider only markers in nonauth paths as errors. */ private boolean nonAuth = false; - /** Source FS; must be or wrap an S3A FS. */ + /** + * Source FS; must be or wrap an S3A FS. + * @param source Source FileSystem + * @return the builder class after scanning source FS + */ public ScanArgsBuilder withSourceFS(final FileSystem source) { this.sourceFS = source; return this; } - /** Path to scan. */ + /** + * Path to scan. + * @param p path to scan + * @return builder class for method chaining + */ public ScanArgsBuilder withPath(final Path p) { this.path = p; return this; } - /** Purge? */ + /** + * Should the markers be purged? This is also enabled when using the clean flag on the CLI. + * @param d set to purge if true + * @return builder class for method chaining + */ public ScanArgsBuilder withDoPurge(final boolean d) { this.doPurge = d; return this; } - /** Min marker count (ignored on purge). */ + /** + * Min marker count an audit must find (ignored on purge). + * @param min Minimum Marker Count (default 0) + * @return builder class for method chaining + */ public ScanArgsBuilder withMinMarkerCount(final int min) { this.minMarkerCount = min; return this; } - /** Max marker count (ignored on purge). */ + /** + * Max marker count an audit must find (ignored on purge). + * @param max Maximum Marker Count (default 0) + * @return builder class for method chaining + */ public ScanArgsBuilder withMaxMarkerCount(final int max) { this.maxMarkerCount = max; return this; } - /** Limit of files to scan; 0 for 'unlimited'. */ + /** + * Limit of files to scan; 0 for 'unlimited'. + * @param l Limit of files to scan + * @return builder class for method chaining + */ public ScanArgsBuilder withLimit(final int l) { this.limit = l; return this; } - /** Consider only markers in nonauth paths as errors. */ + /** + * Consider only markers in non-authoritative paths as errors. + * @param b True if tool should only consider markers in non-authoritative paths + * @return builder class for method chaining + */ public ScanArgsBuilder withNonAuth(final boolean b) { this.nonAuth = b; return this;