diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/ApplicationMaster.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/ApplicationMaster.java index 2433e9f764..015a5a692a 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/ApplicationMaster.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/ApplicationMaster.java @@ -90,19 +90,19 @@ * part of this YARN application. This does not implement any retry/failure * handling. * TODO: Add proper retry/failure handling - * - *
The AM will persist until it has run for a period of time equal to the + *+ * The AM will persist until it has run for a period of time equal to the * timeout specified or until the application is killed. - * - *
If the NameNode is launched internally, it will upload some information + *+ * If the NameNode is launched internally, it will upload some information * onto the remote HDFS instance (i.e., the default FileSystem) about its * hostname and ports. This is in the location determined by the * {@link DynoConstants#DYNAMOMETER_STORAGE_DIR} and * {@link DynoConstants#NN_INFO_FILE_NAME} constants and is in the * {@link Properties} file format. This is consumed by this AM as well as the * {@link Client} to determine how to contact the NameNode. - * - *
Information about the location of the DataNodes is logged by the AM. + *+ * Information about the location of the DataNodes is logged by the AM. */ @InterfaceAudience.Public @InterfaceStability.Unstable @@ -204,6 +204,7 @@ public ApplicationMaster() { * * @param args Command line args * @return Whether init successful and run should be invoked + * @throws ParseException on error while parsing options */ public boolean init(String[] args) throws ParseException { @@ -267,6 +268,9 @@ private void printUsage(Options opts) { * * @return True if the application completed successfully; false if if exited * unexpectedly, failed, was killed, etc. + * @throws YarnException for issues while contacting YARN daemons + * @throws IOException for other issues + * @throws InterruptedException when the thread is interrupted */ public boolean run() throws YarnException, IOException, InterruptedException { LOG.info("Starting ApplicationMaster"); diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/Client.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/Client.java index 2cd6cbf45b..22ec2dbf15 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/Client.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/Client.java @@ -108,24 +108,24 @@ * for them to be accessed by the YARN app, then launches an * {@link ApplicationMaster}, which is responsible for managing the lifetime of * the application. - * - *
The Dynamometer YARN application starts up the DataNodes of an HDFS + *+ * The Dynamometer YARN application starts up the DataNodes of an HDFS * cluster. If the namenode_servicerpc_addr option is specified, it should point * to the service RPC address of an existing namenode, which the datanodes will * talk to. Else, a namenode will be launched internal to this YARN application. * The ApplicationMaster's logs contain links to the NN / DN containers to be * able to access their logs. Some of this information is also printed by the * client. - * - *
The application will store files in the submitting user's home directory + *+ * The application will store files in the submitting user's home directory * under a `.dynamometer/applicationID/` folder. This is mostly for uses * internal to the application, but if the NameNode is launched through YARN, * the NameNode's metrics will also be uploaded to a file `namenode_metrics` * within this folder. This file is also accessible as part of the NameNode's * logs, but this centralized location is easier to access for subsequent * parsing. - * - *
If the NameNode is launched internally, this Client will monitor the + *+ * If the NameNode is launched internally, this Client will monitor the * status of the NameNode, printing information about its availability as the * DataNodes register (e.g., outstanding under replicated blocks as block * reports arrive). If this is configured to launch the workload job, once the @@ -134,8 +134,8 @@ * NameNode. Once the workload job completes, the infrastructure application * will be shut down. At this time only the audit log replay * ({@link AuditReplayMapper}) workload is supported. - * - *
If there is no workload job configured, this application will, by + *+ * If there is no workload job configured, this application will, by * default, persist indefinitely until killed by YARN. You can specify the * timeout option to have it exit automatically after some time. This timeout * will enforced if there is a workload job configured as well. @@ -248,8 +248,8 @@ public class Client extends Configured implements Tool { private Options opts; /** - * @param args - * Command line arguments + * @param args Command line arguments + * @throws Exception on error */ public static void main(String[] args) throws Exception { Client client = new Client( @@ -386,6 +386,8 @@ private void printUsage() { * * @param args Parsed command line options * @return Whether the init was successful to run the client + * @throws ParseException on error while parsing + * @throws IOException for other errors */ public boolean init(String[] args) throws ParseException, IOException { @@ -506,6 +508,8 @@ public boolean init(String[] args) throws ParseException, IOException { * Main run function for the client. * * @return true if application completed successfully + * @throws IOException for general issues + * @throws YarnException for issues while contacting YARN daemons */ public boolean run() throws IOException, YarnException { diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java index e5cc705721..e2dceec39e 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java @@ -116,9 +116,13 @@ private DynoInfraUtils() {} * (checked in that order) is set, use that as the mirror; else use * {@value APACHE_DOWNLOAD_MIRROR_DEFAULT}. * + * @param destinationDir destination directory to save a tarball * @param version The version of Hadoop to download, like "2.7.4" * or "3.0.0-beta1" + * @param conf configuration + * @param log logger instance * @return The path to the tarball. + * @throws IOException on failure */ public static File fetchHadoopTarball(File destinationDir, String version, Configuration conf, Logger log) throws IOException { diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/CreateFileMapper.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/CreateFileMapper.java index 8855f44044..24aec936d9 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/CreateFileMapper.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/CreateFileMapper.java @@ -32,8 +32,8 @@ /** * CreateFileMapper continuously creates 1 byte files for the specified duration * to increase the number of file objects on the NN. - * - *
Configuration options available: + *+ * Configuration options available: *
+ * By default, this assumes that the audit log is in the default log format * set up by Hadoop, like: *
{@code * 1970-01-01 00:00:00,000 INFO FSNamesystem.audit: allowed=true ... diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditLogHiveTableParser.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditLogHiveTableParser.java index 4d36eff752..abc10f4e33 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditLogHiveTableParser.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditLogHiveTableParser.java @@ -40,7 +40,9 @@ * INSERT OVERWRITE DIRECTORY '${outputPath}' * SELECT (timestamp - ${startTime} AS relTime, ugi, cmd, src, dst, ip * FROM '${auditLogTableLocation}' - * WHERE timestamp >= ${startTime} AND timestamp < ${endTime} + * WHERE + * timestamp {@literal >=} ${startTime} + * AND timestamp {@literal <} ${endTime} * DISTRIBUTE BY src * SORT BY relTime ASC; *diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditReplayMapper.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditReplayMapper.java index 8e279ae992..27beda16a6 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditReplayMapper.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/src/main/java/org/apache/hadoop/tools/dynamometer/workloadgenerator/audit/AuditReplayMapper.java @@ -57,16 +57,16 @@ * format of these files is determined by the value of the * {@value COMMAND_PARSER_KEY} configuration, which defaults to * {@link AuditLogDirectParser}. - * - * This generates a number of {@link org.apache.hadoop.mapreduce.Counter} + *
+ * This generates a number of {@link org.apache.hadoop.mapreduce.Counter} * values which can be used to get information into the replay, including the * number of commands replayed, how many of them were "invalid" (threw an * exception), how many were "late" (replayed later than they should have been), * and the latency (from client perspective) of each command. If there are a * large number of "late" commands, you likely need to increase the number of * threads used and/or the number of mappers. - * - *
By default, commands will be replayed at the same rate as they were + *+ * By default, commands will be replayed at the same rate as they were * originally performed. However a rate factor can be specified via the * {@value RATE_FACTOR_KEY} configuration; all of the (relative) timestamps will * be divided by this rate factor, effectively changing the rate at which they