HDFS-12345 Add Dynamometer to hadoop-tools, a tool for scale testing the HDFS NameNode with real metadata and workloads. Contributed by Erik Krogen.

This commit is contained in:
Erik Krogen 2019-03-20 14:06:13 -07:00
parent 48d7f00da8
commit ab0b180ddb
71 changed files with 9202 additions and 1 deletions

View File

@ -148,6 +148,7 @@ run cp -p "${ROOT}/hadoop-client-modules/hadoop-client-runtime/target/hadoop-cli
run cp -p "${ROOT}/hadoop-client-modules/hadoop-client-minicluster/target/hadoop-client-minicluster-${VERSION}.jar" share/hadoop/client/
run copy "${ROOT}/hadoop-tools/hadoop-tools-dist/target/hadoop-tools-dist-${VERSION}" .
run copy "${ROOT}/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-dist/target/hadoop-dynamometer-dist-${VERSION}" .
echo

View File

@ -0,0 +1,35 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
<id>hadoop-dynamometer-blockgen</id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>${basedir}/src/main/bash</directory>
<outputDirectory>dynamometer-blockgen/bin</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
</fileSets>
</assembly>

View File

@ -0,0 +1,35 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
<id>hadoop-dynamometer-infra</id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>${basedir}/src/main/bash</directory>
<outputDirectory>dynamometer-infra/bin</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
</fileSets>
</assembly>

View File

@ -0,0 +1,35 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
<id>hadoop-dynamometer-workload</id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>${basedir}/src/main/bash</directory>
<outputDirectory>dynamometer-workload/bin</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
</fileSets>
</assembly>

View File

@ -0,0 +1,73 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the Li2cense is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.3 http://maven.apache.org/xsd/assembly-1.1.3.xsd">
<id>hadoop-dynamometer</id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
<directory>../hadoop-dynamometer-blockgen/target</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>
<includes>
<include>*-sources.jar</include>
</includes>
</fileSet>
<fileSet>
<directory>../hadoop-dynamometer-blockgen/target/hadoop-dynamometer-blockgen-${project.version}/dynamometer-blockgen</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/dynamometer/dynamometer-blockgen</outputDirectory>
</fileSet>
<fileSet>
<directory>../hadoop-dynamometer-workload/target</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>
<includes>
<include>*-sources.jar</include>
</includes>
</fileSet>
<fileSet>
<directory>../hadoop-dynamometer-workload/target/hadoop-dynamometer-workload-${project.version}/dynamometer-workload</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/dynamometer/dynamometer-workload</outputDirectory>
</fileSet>
<fileSet>
<directory>../hadoop-dynamometer-infra/target</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>
<includes>
<include>*-sources.jar</include>
</includes>
</fileSet>
<fileSet>
<directory>../hadoop-dynamometer-infra/target/hadoop-dynamometer-infra-${project.version}/dynamometer-infra</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/dynamometer/dynamometer-infra</outputDirectory>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
<outputDirectory>/share/hadoop/${hadoop.component}/lib</outputDirectory>
<unpack>false</unpack>
<scope>runtime</scope>
<useProjectArtifact>false</useProjectArtifact>
<excludes>
<!-- use slf4j from common to avoid multiple binding warnings -->
<exclude>org.slf4j:slf4j-api</exclude>
<exclude>org.slf4j:slf4j-log4j12</exclude>
</excludes>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -174,6 +174,28 @@
<directory>../hadoop-sls/target/hadoop-sls-${project.version}/sls</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/sls</outputDirectory>
</fileSet>
<fileSet>
<directory>../hadoop-dynamometer/hadoop-dynamometer-blockgen/target</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>
<includes>
<include>*-sources.jar</include>
</includes>
</fileSet>
<fileSet>
<directory>../hadoop-dynamometer/hadoop-dynamometer-blockgen/target/hadoop-dynamometer-blockgen-${project.version}/dynamometer-blockgen</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/dynamometer-blockgen</outputDirectory>
</fileSet>
<!--<fileSet>-->
<!--<directory>../hadoop-dynamometer/hadoop-dynamometer-blockgen/target</directory>-->
<!--<outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>-->
<!--<includes>-->
<!--<include>*-sources.jar</include>-->
<!--</includes>-->
<!--</fileSet>-->
<!--<fileSet>-->
<!--<directory>../hadoop-dynamometer/hadoop-dynamometer-blockgen/target/hadoop-dynamometer-blockgen-${project.version}/dynamometer-blockgen</directory>-->
<!--<outputDirectory>/share/hadoop/${hadoop.component}/dynamometer-blockgen</outputDirectory>-->
<!--</fileSet>-->
<fileSet>
<directory>../hadoop-resourceestimator/target</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>
@ -209,7 +231,7 @@
<excludes>
<exclude>org.apache.hadoop:hadoop-common</exclude>
<exclude>org.apache.hadoop:hadoop-hdfs</exclude>
<exclude>org.apache.hadoop:hadoop-mapreduce</exclude>
<exclude>org.apache.hadoop:hadoop-client</exclude>
<!-- pipes is native stuff, this just keeps pom from being package-->
<exclude>org.apache.hadoop:hadoop-pipes</exclude>
<!-- use slf4j from common to avoid multiple binding warnings -->

View File

@ -196,6 +196,7 @@
<argument>${project.version}</argument>
<argument>${project.build.directory}</argument>
<argument>${basedir}/../hadoop-tools</argument>
<!--<argument>${basedir}/../hadoop-tools/hadoop-dynamometer</argument>-->
</arguments>
</configuration>
</execution>

View File

@ -0,0 +1,131 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-project</artifactId>
<version>3.3.0-SNAPSHOT</version>
<relativePath>../../../hadoop-project</relativePath>
</parent>
<artifactId>hadoop-dynamometer-blockgen</artifactId>
<version>3.3.0-SNAPSHOT</version>
<description>Apache Hadoop Dynamometer Block Listing Generator</description>
<name>Apache Hadoop Dynamometer Block Listing Generator</name>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.10.19</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minicluster</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<configuration>
<attach>true</attach>
</configuration>
<executions>
<execution>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>dist</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-assemblies</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<executions>
<execution>
<id>dist</id>
<phase>prepare-package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<appendAssemblyId>false</appendAssemblyId>
<attach>false</attach>
<finalName>${project.artifactId}-${project.version}</finalName>
<descriptorRefs>
<descriptorRef>hadoop-dynamometer-blockgen</descriptorRef>
</descriptorRefs>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>deplist</id>
<phase>compile</phase>
<goals>
<goal>list</goal>
</goals>
<configuration>
<!-- referenced by a built-in command -->
<outputFile>${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-builtin.txt</outputFile>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -0,0 +1,38 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if type hadoop &> /dev/null; then
hadoop_cmd="hadoop"
elif type "$HADOOP_COMMON_HOME/bin/hadoop" &> /dev/null; then
hadoop_cmd="$HADOOP_COMMON_HOME/bin/hadoop"
elif type "$HADOOP_HOME/bin/hadoop" &> /dev/null; then
hadoop_cmd="$HADOOP_HOME/bin/hadoop"
else
echo "Unable to find a valid hadoop command to execute; exiting."
exit 1
fi
script_pwd="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
for f in ${script_pwd}/lib/*.jar; do
# Skip adding the blockgen JAR since it is added by the `hadoop jar` command
if [[ "$f" != *"dynamometer-blockgen-"* ]]; then
export HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$f"
fi
done
"$hadoop_cmd" jar "${script_pwd}"/lib/dynamometer-blockgen-*.jar \
org.apache.hadoop.tools.dynamometer.blockgenerator.GenerateBlockImagesDriver "$@"

View File

@ -0,0 +1,136 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.blockgenerator;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
/**
* This is the MapOutputValue class. It has the blockId and the block generation
* stamp which is needed to generate the block images in the reducer.
*
* This also stores the replication of the block, but note that it does not
* serialize this value as part of its {@link Writable} interface, and does not
* consider the replication when doing equality / hash comparisons.
*/
public class BlockInfo implements Writable {
public LongWritable getBlockId() {
return blockId;
}
public void setBlockId(LongWritable blockId) {
this.blockId = blockId;
}
public LongWritable getBlockGenerationStamp() {
return blockGenerationStamp;
}
public void setBlockGenerationStamp(LongWritable blockGenerationStamp) {
this.blockGenerationStamp = blockGenerationStamp;
}
public LongWritable getSize() {
return size;
}
public void setSize(LongWritable size) {
this.size = size;
}
public short getReplication() {
return replication;
}
public void setReplication(short replication) {
this.replication = replication;
}
private LongWritable blockId;
private LongWritable blockGenerationStamp;
private LongWritable size;
private transient short replication;
public BlockInfo(BlockInfo blockInfo) {
this.blockId = blockInfo.getBlockId();
this.blockGenerationStamp = blockInfo.getBlockGenerationStamp();
this.size = blockInfo.getSize();
this.replication = replication;
}
public BlockInfo() {
this.blockId = new LongWritable();
this.blockGenerationStamp = new LongWritable();
this.size = new LongWritable(1);
}
public BlockInfo(long blockid, long blockgenerationstamp) {
this.blockId = new LongWritable(blockid);
this.blockGenerationStamp = new LongWritable(blockgenerationstamp);
this.size = new LongWritable(1);
}
public BlockInfo(long blockid, long blockgenerationstamp, long size) {
this.blockId = new LongWritable(blockid);
this.blockGenerationStamp = new LongWritable(blockgenerationstamp);
this.size = new LongWritable(size);
}
public BlockInfo(long blockid, long blockgenerationstamp, long size,
short replication) {
this.blockId = new LongWritable(blockid);
this.blockGenerationStamp = new LongWritable(blockgenerationstamp);
this.size = new LongWritable(size);
this.replication = replication;
}
public void write(DataOutput dataOutput) throws IOException {
blockId.write(dataOutput);
blockGenerationStamp.write(dataOutput);
size.write(dataOutput);
}
public void readFields(DataInput dataInput) throws IOException {
blockId.readFields(dataInput);
blockGenerationStamp.readFields(dataInput);
size.readFields(dataInput);
}
@Override
public boolean equals(Object o) {
if (!(o instanceof BlockInfo)) {
return false;
}
BlockInfo blkInfo = (BlockInfo) o;
return blkInfo.getBlockId().equals(this.getBlockId())
&& blkInfo.getBlockGenerationStamp()
.equals(this.getBlockGenerationStamp())
&& blkInfo.getSize().equals(this.getSize());
}
@Override
public int hashCode() {
return blockId.hashCode() + 357 * blockGenerationStamp.hashCode()
+ 9357 * size.hashCode();
}
}

View File

@ -0,0 +1,140 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.blockgenerator;
import java.net.URI;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* This is the main driver class. It takes in the following arguments: -
* Required: input path of the fsImage from the HDFS cluster to be simulated -
* Required: output path for generated block image files for each Dynamometer
* DataNode - Required: Number of DataNodes to generate blocks for - Optional:
* Number of reducers to use for the job (defaults to number of DataNodes)
*/
public class GenerateBlockImagesDriver extends Configured implements Tool {
public static final String FSIMAGE_INPUT_PATH_ARG = "fsimage_input_path";
public static final String BLOCK_IMAGE_OUTPUT_ARG = "block_image_output_dir";
public static final String NUM_REDUCERS_ARG = "num_reducers";
public static final String NUM_DATANODES_ARG = "num_datanodes";
public static final String NUM_DATANODES_KEY = "dyno.blockgen.num.datanodes";
public GenerateBlockImagesDriver(Configuration conf) {
setConf(conf);
}
public int run(String[] args) throws Exception {
Options options = new Options();
options.addOption("h", "help", false, "Shows this message");
options.addOption(OptionBuilder.withArgName("Input path of the XML fsImage")
.hasArg().isRequired(true)
.withDescription("Input path to the Hadoop fsImage XML file (required)")
.create(FSIMAGE_INPUT_PATH_ARG));
options.addOption(OptionBuilder.withArgName("BlockImage output directory")
.hasArg().isRequired(true)
.withDescription("Directory where the generated files containing the "
+ "block listing for each DataNode should be stored (required)")
.create(BLOCK_IMAGE_OUTPUT_ARG));
options.addOption(OptionBuilder.withArgName("Number of reducers").hasArg()
.isRequired(false)
.withDescription(
"Number of reducers for this job (defaults to number of datanodes)")
.create(NUM_REDUCERS_ARG));
options.addOption(OptionBuilder.withArgName("Number of datanodes").hasArg()
.isRequired(true)
.withDescription("Number of DataNodes to create blocks for (required)")
.create(NUM_DATANODES_ARG));
CommandLineParser parser = new PosixParser();
CommandLine cli = parser.parse(options, args);
if (cli.hasOption("h")) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp(200, "hadoop jar dynamometer-*.jar "
+ "org.apache.hadoop.tools.dynamometer.blockgenerator."
+ "GenerateBlockImagesDriver [options]",
null, options, null);
return 0;
}
String fsImageInputPath = cli.getOptionValue(FSIMAGE_INPUT_PATH_ARG);
String blockImageOutputDir = cli.getOptionValue(BLOCK_IMAGE_OUTPUT_ARG);
int numDataNodes = Integer.parseInt(cli.getOptionValue(NUM_DATANODES_ARG));
int numReducers = Integer.parseInt(
cli.getOptionValue(NUM_REDUCERS_ARG, String.valueOf(numDataNodes)));
FileSystem fs = FileSystem.get(new URI(blockImageOutputDir), getConf());
Job job = Job.getInstance(getConf(), "Create blocksImages for Dynamometer");
FileInputFormat.setInputPaths(job, new Path(fsImageInputPath));
Path blockImagesDir = new Path(blockImageOutputDir);
fs.delete(blockImagesDir, true);
FileOutputFormat.setOutputPath(job, blockImagesDir);
job.getConfiguration().setInt(NUM_DATANODES_KEY, numDataNodes);
job.setJarByClass(GenerateBlockImagesDriver.class);
job.setInputFormatClass(NoSplitTextInputFormat.class);
job.setNumReduceTasks(numReducers);
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
job.setMapperClass(XMLParserMapper.class);
job.setReducerClass(GenerateDNBlockInfosReducer.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(BlockInfo.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
public static void main(String[] args) throws Exception {
GenerateBlockImagesDriver driver = new GenerateBlockImagesDriver(
new Configuration());
System.exit(ToolRunner.run(driver, args));
}
/** A simple text input format that doesn't allow splitting of files. */
public static class NoSplitTextInputFormat extends TextInputFormat {
@Override
public boolean isSplitable(JobContext context, Path file) {
return false;
}
}
}

View File

@ -0,0 +1,99 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.blockgenerator;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This Reducer class generates a simple text file for each DN, listing the
* blocks to be generated.
*
* Input: {@link BlockInfo}'s from {@link XMLParserMapper}
*
* Output: A text file named as dni-XXX, where i is the ID of the DN and XXX is
* a reducer ID. Each line in the file is in format:
* blockID,blockGenStamp,blockSize
*/
public class GenerateDNBlockInfosReducer
extends Reducer<IntWritable, BlockInfo, NullWritable, Text> {
private static final Logger LOG = LoggerFactory
.getLogger(GenerateDNBlockInfosReducer.class);
private MultipleOutputs<NullWritable, Text> multiOutputs;
@Override
public void setup(
Reducer<IntWritable, BlockInfo, NullWritable, Text>.Context context) {
multiOutputs = new MultipleOutputs<>(context);
}
@Override
public void cleanup(Context context)
throws IOException, InterruptedException {
multiOutputs.close();
multiOutputs = null;
}
@Override
public void reduce(IntWritable key, Iterable<BlockInfo> values,
Context context) throws IOException, InterruptedException {
long blockIndex = 0;
int datanodeId = key.get();
String dnFile = "dn" + datanodeId + "-a-"
+ context.getTaskAttemptID().getId();
Iterator<BlockInfo> it = values.iterator();
long startTimestamp = System.currentTimeMillis();
long endTimestamp;
Path baseOutputPath = FileOutputFormat.getOutputPath(context);
String fullPath = new Path(baseOutputPath, dnFile).toString();
Text out = new Text();
while (it.hasNext()) {
BlockInfo blockInfo = new BlockInfo(it.next());
String blockLine = blockInfo.getBlockId() + ","
+ blockInfo.getBlockGenerationStamp() + "," + blockInfo.getSize();
out.set(blockLine);
multiOutputs.write(NullWritable.get(), out, fullPath);
blockIndex++;
// Report progress for every 1000 blocks
if (blockIndex % 1000 == 0) {
context.progress();
endTimestamp = System.currentTimeMillis();
System.out.println("Time taken to process 1000 records in ms:"
+ (endTimestamp - startTimestamp));
startTimestamp = endTimestamp;
}
}
LOG.info("Number of blocks processed:" + blockIndex);
}
}

View File

@ -0,0 +1,154 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.blockgenerator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This class parses an fsimage file in XML format. It accepts the file
* line-by-line and maintains an internal state machine to keep track of
* contextual information. A single parser must process the entire file with the
* lines in the order they appear in the original file.
*
* A file may be spread across multiple lines, so we need to track the
* replication of the file we are currently processing to be aware of what the
* replication factor is for each block we encounter. This is why we require a
* single mapper.
*
* The format is illustrated below (line breaks for readability):
* <pre>{@code
* <inode><id>inode_ID<id/> <type>inode_type</type>
* <replication>inode_replication</replication> [file attributes] <blocks>
* <block><id>XXX</id><genstamp>XXX</genstamp><numBytes>XXX</numBytes><block/>
* <blocks/> <inode/>
* }</pre>
*
* This is true in both Hadoop 2 and 3.
*/
class XMLParser {
private static final Pattern BLOCK_PATTERN = Pattern.compile("<block>"
+ "<id>(\\d+)</id>"
+ "<genstamp>(\\d+)</genstamp>"
+ "<numBytes>(\\d+)</numBytes>"
+ "</block>");
private State currentState = State.DEFAULT;
private short currentReplication;
/**
* Accept a single line of the XML file, and return a {@link BlockInfo} for
* any blocks contained within that line. Update internal state dependent on
* other XML values seen, e.g. the beginning of a file.
*
* @param line The XML line to parse.
* @return {@code BlockInfo}s for any blocks found.
*/
List<BlockInfo> parseLine(String line) throws IOException {
if (line.contains("<inode>")) {
transitionTo(State.INODE);
}
if (line.contains("<type>FILE</type>")) {
transitionTo(State.FILE);
}
List<String> replicationStrings = valuesFromXMLString(line, "replication");
if (!replicationStrings.isEmpty()) {
if (replicationStrings.size() > 1) {
throw new IOException(String.format("Found %s replication strings",
replicationStrings.size()));
}
transitionTo(State.FILE_WITH_REPLICATION);
currentReplication = Short.parseShort(replicationStrings.get(0));
}
Matcher blockMatcher = BLOCK_PATTERN.matcher(line);
List<BlockInfo> blockInfos = new ArrayList<>();
while (blockMatcher.find()) {
if (currentState != State.FILE_WITH_REPLICATION) {
throw new IOException(
"Found a block string when in state: " + currentState);
}
long id = Long.parseLong(blockMatcher.group(1));
long gs = Long.parseLong(blockMatcher.group(2));
long size = Long.parseLong(blockMatcher.group(3));
blockInfos.add(new BlockInfo(id, gs, size, currentReplication));
}
if (line.contains("</inode>")) {
transitionTo(State.DEFAULT);
}
return blockInfos;
}
/**
* Attempt to transition to another state.
*
* @param nextState The new state to transition to.
* @throws IOException If the transition from the current state to
* {@code nextState} is not allowed.
*/
private void transitionTo(State nextState) throws IOException {
if (currentState.transitionAllowed(nextState)) {
currentState = nextState;
} else {
throw new IOException("State transition not allowed; from " + currentState
+ " to " + nextState);
}
}
/**
* @param xml An XML string
* @param field The field whose value(s) should be extracted
* @return List of the field's values.
*/
private static List<String> valuesFromXMLString(String xml, String field) {
Matcher m = Pattern.compile("<" + field + ">(.+?)</" + field + ">")
.matcher(xml);
List<String> found = new ArrayList<>();
while (m.find()) {
found.add(m.group(1));
}
return found;
}
private enum State {
DEFAULT, INODE, FILE, FILE_WITH_REPLICATION;
private final Set<State> allowedTransitions = new HashSet<>();
static {
DEFAULT.addTransitions(DEFAULT, INODE);
INODE.addTransitions(DEFAULT, FILE);
FILE.addTransitions(DEFAULT, FILE_WITH_REPLICATION);
FILE_WITH_REPLICATION.addTransitions(DEFAULT);
}
private void addTransitions(State... nextState) {
allowedTransitions.addAll(Arrays.asList(nextState));
}
boolean transitionAllowed(State nextState) {
return allowedTransitions.contains(nextState);
}
}
}

View File

@ -0,0 +1,81 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.blockgenerator;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This Mapper class generates a list of {@link BlockInfo}'s from a given
* fsimage.
*
* Input: fsimage in XML format. It should be generated using
* {@code org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer}.
*
* Output: list of all {@link BlockInfo}'s
*/
public class XMLParserMapper
extends Mapper<LongWritable, Text, IntWritable, BlockInfo> {
private static final Logger LOG =
LoggerFactory.getLogger(XMLParserMapper.class);
@Override
public void setup(Mapper.Context context) {
Configuration conf = context.getConfiguration();
numDataNodes = conf.getInt(GenerateBlockImagesDriver.NUM_DATANODES_KEY, -1);
parser = new XMLParser();
}
// Blockindexes should be generated serially
private int blockIndex = 0;
private int numDataNodes;
private XMLParser parser;
/**
* Read the input XML file line by line, and generate list of blocks. The
* actual parsing logic is handled by {@link XMLParser}. This mapper just
* delegates to that class and then writes the blocks to the corresponding
* index to be processed by reducers.
*/
@Override
public void map(LongWritable lineNum, Text line,
Mapper<LongWritable, Text, IntWritable, BlockInfo>.Context context)
throws IOException, InterruptedException {
List<BlockInfo> blockInfos = parser.parseLine(line.toString());
for (BlockInfo blockInfo : blockInfos) {
for (short i = 0; i < blockInfo.getReplication(); i++) {
context.write(new IntWritable((blockIndex + i) % numDataNodes),
blockInfo);
}
blockIndex++;
if (blockIndex % 1000000 == 0) {
LOG.info("Processed " + blockIndex + " blocks");
}
}
}
}

View File

@ -0,0 +1,26 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Tools to help generate block files as input to Dynamometer.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
package org.apache.hadoop.tools.dynamometer.blockgenerator;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;

View File

@ -0,0 +1,89 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.blockgenerator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.junit.Assert.assertEquals;
/** Tests for block generation via {@link GenerateBlockImagesDriver}. */
public class TestBlockGen {
private static final Logger LOG = LoggerFactory.getLogger(TestBlockGen.class);
private MiniDFSCluster dfsCluster;
private FileSystem fs;
private static final String FS_IMAGE_NAME = "fsimage_0000000000000061740.xml";
private static final String BLOCK_LIST_OUTPUT_DIR_NAME = "blockLists";
private Path tmpPath;
@Before
public void setup() throws Exception {
Configuration conf = new Configuration();
dfsCluster = new MiniDFSCluster.Builder(conf).build();
dfsCluster.waitActive();
LOG.info("Started MiniDFSCluster");
fs = dfsCluster.getFileSystem();
FileSystem.setDefaultUri(conf, fs.getUri());
tmpPath = fs.makeQualified(new Path("/tmp"));
fs.mkdirs(tmpPath);
String fsImageFile = this.getClass().getClassLoader()
.getResource(FS_IMAGE_NAME).getPath();
fs.copyFromLocalFile(new Path(fsImageFile),
new Path(tmpPath, FS_IMAGE_NAME));
}
@After
public void cleanUp() {
dfsCluster.shutdown();
}
@Test
public void testBlockGen() throws Exception {
LOG.info("Started test");
int datanodeCount = 40;
GenerateBlockImagesDriver driver = new GenerateBlockImagesDriver(
new Configuration());
driver.run(
new String[] {"-" + GenerateBlockImagesDriver.FSIMAGE_INPUT_PATH_ARG,
new Path(tmpPath, FS_IMAGE_NAME).toString(),
"-" + GenerateBlockImagesDriver.BLOCK_IMAGE_OUTPUT_ARG,
new Path(tmpPath, BLOCK_LIST_OUTPUT_DIR_NAME).toString(),
"-" + GenerateBlockImagesDriver.NUM_DATANODES_ARG,
String.valueOf(datanodeCount)});
for (int i = 0; i < datanodeCount; i++) {
final int idx = i;
assertEquals(1, fs.listStatus(
new Path(tmpPath, BLOCK_LIST_OUTPUT_DIR_NAME),
(path) -> path.getName().startsWith(String.format("dn%d-", idx))
).length);
}
}
}

View File

@ -0,0 +1,70 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.blockgenerator;
import java.util.HashMap;
import java.util.Map;
import org.junit.Test;
import static org.junit.Assert.*;
/** Tests for {@link XMLParser}. */
public class TestXMLParser {
/**
* Testing whether {@link XMLParser} correctly parses an XML fsimage file into
* {@link BlockInfo}'s. Note that some files have multiple lines.
*/
@Test
public void testBlocksFromLine() throws Exception {
String[] lines = {
"<INodeSection><lastInodeId>1"
+ "</lastInodeId><inode><id>2</id><type>FILE</type>"
+ "<name>fake-file</name>"
+ "<replication>3</replication><mtime>3</mtime>"
+ "<atime>4</atime>" + "<perferredBlockSize>5</perferredBlockSize>"
+ "<permission>hdfs:hdfs:rw-------</permission>"
+ "<blocks><block><id>6</id><genstamp>7</genstamp>"
+ "<numBytes>8</numBytes></block>"
+ "<block><id>9</id><genstamp>10</genstamp>"
+ "<numBytes>11</numBytes></block></inode>",
"<inode><type>DIRECTORY</type></inode>", "<inode><type>FILE</type>",
"<replication>12</replication>",
"<blocks><block><id>13</id><genstamp>14</genstamp>"
+ "<numBytes>15</numBytes></block>",
"</inode>"
};
Map<BlockInfo, Short> expectedBlockCount = new HashMap<>();
expectedBlockCount.put(new BlockInfo(6, 7, 8), (short) 3);
expectedBlockCount.put(new BlockInfo(9, 10, 11), (short) 3);
expectedBlockCount.put(new BlockInfo(13, 14, 15), (short) 12);
final Map<BlockInfo, Short> actualBlockCount = new HashMap<>();
XMLParser parser = new XMLParser();
for (String line : lines) {
for (BlockInfo info : parser.parseLine(line)) {
actualBlockCount.put(info, info.getReplication());
}
}
for (Map.Entry<BlockInfo, Short> expect : expectedBlockCount.entrySet()) {
assertEquals(expect.getValue(), actualBlockCount.get(expect.getKey()));
}
}
}

View File

@ -0,0 +1,553 @@
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<fsimage><NameSection>
<genstampV1>1000</genstampV1><genstampV2>11100</genstampV2><genstampV1Limit>0</genstampV1Limit><lastAllocatedBlockId>1073751924</lastAllocatedBlockId><txid>61740</txid></NameSection>
<INodeSection><lastInodeId>26700</lastInodeId><inode><id>16385</id><type>DIRECTORY</type><name></name><mtime>1494453394091</mtime><permission>hdfs:hdfs:rwxr-xr-x</permission><nsquota>9223372036854775807</nsquota><dsquota>-1</dsquota></inode>
<inode><id>16386</id><type>DIRECTORY</type><name>tmp</name><mtime>1494453775668</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>16387</id><type>DIRECTORY</type><name>data</name><mtime>1494453387057</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>16388</id><type>DIRECTORY</type><name>user</name><mtime>1494453399239</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>16389</id><type>DIRECTORY</type><name>hdfs</name><mtime>1494453399239</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>16390</id><type>DIRECTORY</type><name>files</name><mtime>1494614176953</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26491</id><type>DIRECTORY</type><name>0</name><mtime>1494614175397</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26492</id><type>DIRECTORY</type><name>0</name><mtime>1494614175198</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26493</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175183</mtime><atime>1494614175049</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751825</id><genstamp>11001</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26494</id><type>DIRECTORY</type><name>1</name><mtime>1494614175224</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26495</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175222</mtime><atime>1494614175212</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751826</id><genstamp>11002</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26496</id><type>DIRECTORY</type><name>2</name><mtime>1494614175259</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26497</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175257</mtime><atime>1494614175234</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751827</id><genstamp>11003</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26498</id><type>DIRECTORY</type><name>3</name><mtime>1494614175280</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26499</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175278</mtime><atime>1494614175269</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751828</id><genstamp>11004</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26500</id><type>DIRECTORY</type><name>4</name><mtime>1494614175302</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26501</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175299</mtime><atime>1494614175291</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751829</id><genstamp>11005</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26502</id><type>DIRECTORY</type><name>5</name><mtime>1494614175323</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26503</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175321</mtime><atime>1494614175312</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751830</id><genstamp>11006</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26504</id><type>DIRECTORY</type><name>6</name><mtime>1494614175345</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26505</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175343</mtime><atime>1494614175333</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751831</id><genstamp>11007</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26506</id><type>DIRECTORY</type><name>7</name><mtime>1494614175366</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26507</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175363</mtime><atime>1494614175355</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751832</id><genstamp>11008</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26508</id><type>DIRECTORY</type><name>8</name><mtime>1494614175387</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26509</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175385</mtime><atime>1494614175376</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751833</id><genstamp>11009</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26510</id><type>DIRECTORY</type><name>9</name><mtime>1494614175413</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26511</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175410</mtime><atime>1494614175402</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751834</id><genstamp>11010</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26512</id><type>DIRECTORY</type><name>1</name><mtime>1494614175626</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26513</id><type>DIRECTORY</type><name>0</name><mtime>1494614175447</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26514</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175445</mtime><atime>1494614175437</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751835</id><genstamp>11011</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26515</id><type>DIRECTORY</type><name>1</name><mtime>1494614175467</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26516</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175464</mtime><atime>1494614175456</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751836</id><genstamp>11012</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26517</id><type>DIRECTORY</type><name>2</name><mtime>1494614175486</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26518</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175484</mtime><atime>1494614175475</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751837</id><genstamp>11013</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26519</id><type>DIRECTORY</type><name>3</name><mtime>1494614175514</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26520</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175512</mtime><atime>1494614175496</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751838</id><genstamp>11014</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26521</id><type>DIRECTORY</type><name>4</name><mtime>1494614175533</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26522</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175531</mtime><atime>1494614175523</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751839</id><genstamp>11015</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26523</id><type>DIRECTORY</type><name>5</name><mtime>1494614175552</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26524</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175549</mtime><atime>1494614175540</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751840</id><genstamp>11016</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26525</id><type>DIRECTORY</type><name>6</name><mtime>1494614175574</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26526</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175572</mtime><atime>1494614175563</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751841</id><genstamp>11017</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26527</id><type>DIRECTORY</type><name>7</name><mtime>1494614175603</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26528</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175592</mtime><atime>1494614175583</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751842</id><genstamp>11018</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26529</id><type>DIRECTORY</type><name>8</name><mtime>1494614175622</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26530</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175620</mtime><atime>1494614175612</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751843</id><genstamp>11019</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26531</id><type>DIRECTORY</type><name>9</name><mtime>1494614175643</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26532</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175640</mtime><atime>1494614175631</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751844</id><genstamp>11020</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26533</id><type>DIRECTORY</type><name>2</name><mtime>1494614175829</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26534</id><type>DIRECTORY</type><name>0</name><mtime>1494614175669</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26535</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175666</mtime><atime>1494614175658</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751845</id><genstamp>11021</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26536</id><type>DIRECTORY</type><name>1</name><mtime>1494614175688</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26537</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175686</mtime><atime>1494614175678</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751846</id><genstamp>11022</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26538</id><type>DIRECTORY</type><name>2</name><mtime>1494614175708</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26539</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175705</mtime><atime>1494614175697</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751847</id><genstamp>11023</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26540</id><type>DIRECTORY</type><name>3</name><mtime>1494614175727</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26541</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175725</mtime><atime>1494614175716</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751848</id><genstamp>11024</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26542</id><type>DIRECTORY</type><name>4</name><mtime>1494614175746</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26543</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175744</mtime><atime>1494614175736</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751849</id><genstamp>11025</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26544</id><type>DIRECTORY</type><name>5</name><mtime>1494614175772</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26545</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175770</mtime><atime>1494614175755</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751850</id><genstamp>11026</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26546</id><type>DIRECTORY</type><name>6</name><mtime>1494614175790</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26547</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175788</mtime><atime>1494614175780</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751851</id><genstamp>11027</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26548</id><type>DIRECTORY</type><name>7</name><mtime>1494614175808</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26549</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175806</mtime><atime>1494614175798</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751852</id><genstamp>11028</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26550</id><type>DIRECTORY</type><name>8</name><mtime>1494614175826</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26551</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175823</mtime><atime>1494614175816</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751853</id><genstamp>11029</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26552</id><type>DIRECTORY</type><name>9</name><mtime>1494614175853</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26553</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175842</mtime><atime>1494614175834</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751854</id><genstamp>11030</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26554</id><type>DIRECTORY</type><name>3</name><mtime>1494614176026</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26555</id><type>DIRECTORY</type><name>0</name><mtime>1494614175875</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26556</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175873</mtime><atime>1494614175865</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751855</id><genstamp>11031</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26557</id><type>DIRECTORY</type><name>1</name><mtime>1494614175893</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26558</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175891</mtime><atime>1494614175883</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751856</id><genstamp>11032</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26559</id><type>DIRECTORY</type><name>2</name><mtime>1494614175911</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26560</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175908</mtime><atime>1494614175901</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751857</id><genstamp>11033</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26561</id><type>DIRECTORY</type><name>3</name><mtime>1494614175930</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26562</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175927</mtime><atime>1494614175919</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751858</id><genstamp>11034</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26563</id><type>DIRECTORY</type><name>4</name><mtime>1494614175947</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26564</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175946</mtime><atime>1494614175938</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751859</id><genstamp>11035</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26565</id><type>DIRECTORY</type><name>5</name><mtime>1494614175964</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26566</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175962</mtime><atime>1494614175955</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751860</id><genstamp>11036</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26567</id><type>DIRECTORY</type><name>6</name><mtime>1494614175980</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26568</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175978</mtime><atime>1494614175971</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751861</id><genstamp>11037</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26569</id><type>DIRECTORY</type><name>7</name><mtime>1494614176000</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26570</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614175997</mtime><atime>1494614175989</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751862</id><genstamp>11038</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26571</id><type>DIRECTORY</type><name>8</name><mtime>1494614176018</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26572</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176016</mtime><atime>1494614176009</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751863</id><genstamp>11039</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26573</id><type>DIRECTORY</type><name>9</name><mtime>1494614176040</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26574</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176038</mtime><atime>1494614176031</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751864</id><genstamp>11040</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26575</id><type>DIRECTORY</type><name>4</name><mtime>1494614176214</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26576</id><type>DIRECTORY</type><name>0</name><mtime>1494614176063</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26577</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176061</mtime><atime>1494614176053</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751865</id><genstamp>11041</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26578</id><type>DIRECTORY</type><name>1</name><mtime>1494614176081</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26579</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176080</mtime><atime>1494614176071</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751866</id><genstamp>11042</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26580</id><type>DIRECTORY</type><name>2</name><mtime>1494614176101</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26581</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176098</mtime><atime>1494614176089</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751867</id><genstamp>11043</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26582</id><type>DIRECTORY</type><name>3</name><mtime>1494614176118</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26583</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176115</mtime><atime>1494614176108</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751868</id><genstamp>11044</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26584</id><type>DIRECTORY</type><name>4</name><mtime>1494614176136</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26585</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176134</mtime><atime>1494614176127</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751869</id><genstamp>11045</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26586</id><type>DIRECTORY</type><name>5</name><mtime>1494614176153</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26587</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176151</mtime><atime>1494614176144</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751870</id><genstamp>11046</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26588</id><type>DIRECTORY</type><name>6</name><mtime>1494614176178</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26589</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176176</mtime><atime>1494614176160</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751871</id><genstamp>11047</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26590</id><type>DIRECTORY</type><name>7</name><mtime>1494614176194</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26591</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176193</mtime><atime>1494614176186</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751872</id><genstamp>11048</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26592</id><type>DIRECTORY</type><name>8</name><mtime>1494614176211</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26593</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176209</mtime><atime>1494614176201</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751873</id><genstamp>11049</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26594</id><type>DIRECTORY</type><name>9</name><mtime>1494614176228</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26595</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176226</mtime><atime>1494614176219</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751874</id><genstamp>11050</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26596</id><type>DIRECTORY</type><name>5</name><mtime>1494614176388</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26597</id><type>DIRECTORY</type><name>0</name><mtime>1494614176249</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26598</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176247</mtime><atime>1494614176240</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751875</id><genstamp>11051</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26599</id><type>DIRECTORY</type><name>1</name><mtime>1494614176266</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26600</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176264</mtime><atime>1494614176257</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751876</id><genstamp>11052</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26601</id><type>DIRECTORY</type><name>2</name><mtime>1494614176288</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26602</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176286</mtime><atime>1494614176274</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751877</id><genstamp>11053</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26603</id><type>DIRECTORY</type><name>3</name><mtime>1494614176304</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26604</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176303</mtime><atime>1494614176295</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751878</id><genstamp>11054</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26605</id><type>DIRECTORY</type><name>4</name><mtime>1494614176320</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26606</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176318</mtime><atime>1494614176311</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751879</id><genstamp>11055</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26607</id><type>DIRECTORY</type><name>5</name><mtime>1494614176336</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26608</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176334</mtime><atime>1494614176327</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751880</id><genstamp>11056</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26609</id><type>DIRECTORY</type><name>6</name><mtime>1494614176354</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26610</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176351</mtime><atime>1494614176343</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751881</id><genstamp>11057</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26611</id><type>DIRECTORY</type><name>7</name><mtime>1494614176369</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26612</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176368</mtime><atime>1494614176360</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751882</id><genstamp>11058</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26613</id><type>DIRECTORY</type><name>8</name><mtime>1494614176385</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26614</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176383</mtime><atime>1494614176376</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751883</id><genstamp>11059</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26615</id><type>DIRECTORY</type><name>9</name><mtime>1494614176401</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26616</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176399</mtime><atime>1494614176392</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751884</id><genstamp>11060</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26617</id><type>DIRECTORY</type><name>6</name><mtime>1494614176584</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26618</id><type>DIRECTORY</type><name>0</name><mtime>1494614176432</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26619</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176421</mtime><atime>1494614176413</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751885</id><genstamp>11061</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26620</id><type>DIRECTORY</type><name>1</name><mtime>1494614176450</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26621</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176448</mtime><atime>1494614176440</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751886</id><genstamp>11062</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26622</id><type>DIRECTORY</type><name>2</name><mtime>1494614176468</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26623</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176465</mtime><atime>1494614176457</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751887</id><genstamp>11063</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26624</id><type>DIRECTORY</type><name>3</name><mtime>1494614176486</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26625</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176483</mtime><atime>1494614176475</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751888</id><genstamp>11064</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26626</id><type>DIRECTORY</type><name>4</name><mtime>1494614176503</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26627</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176501</mtime><atime>1494614176493</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751889</id><genstamp>11065</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26628</id><type>DIRECTORY</type><name>5</name><mtime>1494614176519</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26629</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176517</mtime><atime>1494614176511</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751890</id><genstamp>11066</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26630</id><type>DIRECTORY</type><name>6</name><mtime>1494614176550</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26631</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176540</mtime><atime>1494614176526</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751891</id><genstamp>11067</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26632</id><type>DIRECTORY</type><name>7</name><mtime>1494614176566</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26633</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176564</mtime><atime>1494614176557</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751892</id><genstamp>11068</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26634</id><type>DIRECTORY</type><name>8</name><mtime>1494614176580</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26635</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176578</mtime><atime>1494614176572</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751893</id><genstamp>11069</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26636</id><type>DIRECTORY</type><name>9</name><mtime>1494614176596</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26637</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176594</mtime><atime>1494614176588</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751894</id><genstamp>11070</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26638</id><type>DIRECTORY</type><name>7</name><mtime>1494614176756</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26639</id><type>DIRECTORY</type><name>0</name><mtime>1494614176616</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26640</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176613</mtime><atime>1494614176606</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751895</id><genstamp>11071</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26641</id><type>DIRECTORY</type><name>1</name><mtime>1494614176631</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26642</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176629</mtime><atime>1494614176622</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751896</id><genstamp>11072</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26643</id><type>DIRECTORY</type><name>2</name><mtime>1494614176647</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26644</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176645</mtime><atime>1494614176638</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751897</id><genstamp>11073</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26645</id><type>DIRECTORY</type><name>3</name><mtime>1494614176662</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26646</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176660</mtime><atime>1494614176653</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751898</id><genstamp>11074</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26647</id><type>DIRECTORY</type><name>4</name><mtime>1494614176678</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26648</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176676</mtime><atime>1494614176669</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751899</id><genstamp>11075</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26649</id><type>DIRECTORY</type><name>5</name><mtime>1494614176703</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26650</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176701</mtime><atime>1494614176694</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751900</id><genstamp>11076</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26651</id><type>DIRECTORY</type><name>6</name><mtime>1494614176719</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26652</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176717</mtime><atime>1494614176710</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751901</id><genstamp>11077</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26653</id><type>DIRECTORY</type><name>7</name><mtime>1494614176736</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26654</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176734</mtime><atime>1494614176726</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751902</id><genstamp>11078</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26655</id><type>DIRECTORY</type><name>8</name><mtime>1494614176753</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26656</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176751</mtime><atime>1494614176743</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751903</id><genstamp>11079</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26657</id><type>DIRECTORY</type><name>9</name><mtime>1494614176770</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26658</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176768</mtime><atime>1494614176761</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751904</id><genstamp>11080</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26659</id><type>DIRECTORY</type><name>8</name><mtime>1494614176936</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26660</id><type>DIRECTORY</type><name>0</name><mtime>1494614176791</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26661</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176789</mtime><atime>1494614176782</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751905</id><genstamp>11081</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26662</id><type>DIRECTORY</type><name>1</name><mtime>1494614176822</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26663</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176820</mtime><atime>1494614176798</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751906</id><genstamp>11082</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26664</id><type>DIRECTORY</type><name>2</name><mtime>1494614176837</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26665</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176835</mtime><atime>1494614176828</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751907</id><genstamp>11083</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26666</id><type>DIRECTORY</type><name>3</name><mtime>1494614176853</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26667</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176851</mtime><atime>1494614176844</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751908</id><genstamp>11084</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26668</id><type>DIRECTORY</type><name>4</name><mtime>1494614176868</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26669</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176866</mtime><atime>1494614176859</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751909</id><genstamp>11085</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26670</id><type>DIRECTORY</type><name>5</name><mtime>1494614176886</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26671</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176883</mtime><atime>1494614176875</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751910</id><genstamp>11086</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26672</id><type>DIRECTORY</type><name>6</name><mtime>1494614176901</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26673</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176900</mtime><atime>1494614176892</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751911</id><genstamp>11087</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26674</id><type>DIRECTORY</type><name>7</name><mtime>1494614176917</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26675</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176915</mtime><atime>1494614176908</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751912</id><genstamp>11088</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26676</id><type>DIRECTORY</type><name>8</name><mtime>1494614176933</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26677</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176931</mtime><atime>1494614176924</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751913</id><genstamp>11089</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26678</id><type>DIRECTORY</type><name>9</name><mtime>1494614176949</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26679</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176947</mtime><atime>1494614176940</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751914</id><genstamp>11090</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26680</id><type>DIRECTORY</type><name>9</name><mtime>1494614177104</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26681</id><type>DIRECTORY</type><name>0</name><mtime>1494614176970</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26682</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176967</mtime><atime>1494614176961</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751915</id><genstamp>11091</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26683</id><type>DIRECTORY</type><name>1</name><mtime>1494614176984</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26684</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176982</mtime><atime>1494614176976</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751916</id><genstamp>11092</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26685</id><type>DIRECTORY</type><name>2</name><mtime>1494614176999</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26686</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614176998</mtime><atime>1494614176991</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751917</id><genstamp>11093</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26687</id><type>DIRECTORY</type><name>3</name><mtime>1494614177016</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26688</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614177014</mtime><atime>1494614177007</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751918</id><genstamp>11094</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26689</id><type>DIRECTORY</type><name>4</name><mtime>1494614177031</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26690</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614177029</mtime><atime>1494614177022</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751919</id><genstamp>11095</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26691</id><type>DIRECTORY</type><name>5</name><mtime>1494614177046</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26692</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614177044</mtime><atime>1494614177037</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751920</id><genstamp>11096</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26693</id><type>DIRECTORY</type><name>6</name><mtime>1494614177063</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26694</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614177061</mtime><atime>1494614177054</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751921</id><genstamp>11097</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26695</id><type>DIRECTORY</type><name>7</name><mtime>1494614177086</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26696</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614177084</mtime><atime>1494614177077</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751922</id><genstamp>11098</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26697</id><type>DIRECTORY</type><name>8</name><mtime>1494614177101</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26698</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614177099</mtime><atime>1494614177093</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751923</id><genstamp>11099</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
<inode><id>26699</id><type>DIRECTORY</type><name>9</name><mtime>1494614177117</mtime><permission>hdfs:hdfs:rwxr-x---</permission><nsquota>-1</nsquota><dsquota>-1</dsquota></inode>
<inode><id>26700</id><type>FILE</type><name>foo</name><replication>1</replication><mtime>1494614177115</mtime><atime>1494614177108</atime><perferredBlockSize>536870912</perferredBlockSize><permission>hdfs:hdfs:rw-r-----</permission><blocks><block><id>1073751924</id><genstamp>11100</genstamp><numBytes>4</numBytes></block>
</blocks>
</inode>
</INodeSection>
<INodeReferenceSection></INodeReferenceSection><SnapshotSection><snapshotCounter>0</snapshotCounter></SnapshotSection>
<INodeDirectorySection><directory><parent>16385</parent><inode>16387</inode><inode>16386</inode><inode>16388</inode></directory>
<directory><parent>16386</parent><inode>16390</inode></directory>
<directory><parent>16388</parent><inode>16389</inode></directory>
<directory><parent>16390</parent><inode>26491</inode><inode>26512</inode><inode>26533</inode><inode>26554</inode><inode>26575</inode><inode>26596</inode><inode>26617</inode><inode>26638</inode><inode>26659</inode><inode>26680</inode></directory>
<directory><parent>26491</parent><inode>26492</inode><inode>26494</inode><inode>26496</inode><inode>26498</inode><inode>26500</inode><inode>26502</inode><inode>26504</inode><inode>26506</inode><inode>26508</inode><inode>26510</inode></directory>
<directory><parent>26492</parent><inode>26493</inode></directory>
<directory><parent>26494</parent><inode>26495</inode></directory>
<directory><parent>26496</parent><inode>26497</inode></directory>
<directory><parent>26498</parent><inode>26499</inode></directory>
<directory><parent>26500</parent><inode>26501</inode></directory>
<directory><parent>26502</parent><inode>26503</inode></directory>
<directory><parent>26504</parent><inode>26505</inode></directory>
<directory><parent>26506</parent><inode>26507</inode></directory>
<directory><parent>26508</parent><inode>26509</inode></directory>
<directory><parent>26510</parent><inode>26511</inode></directory>
<directory><parent>26512</parent><inode>26513</inode><inode>26515</inode><inode>26517</inode><inode>26519</inode><inode>26521</inode><inode>26523</inode><inode>26525</inode><inode>26527</inode><inode>26529</inode><inode>26531</inode></directory>
<directory><parent>26513</parent><inode>26514</inode></directory>
<directory><parent>26515</parent><inode>26516</inode></directory>
<directory><parent>26517</parent><inode>26518</inode></directory>
<directory><parent>26519</parent><inode>26520</inode></directory>
<directory><parent>26521</parent><inode>26522</inode></directory>
<directory><parent>26523</parent><inode>26524</inode></directory>
<directory><parent>26525</parent><inode>26526</inode></directory>
<directory><parent>26527</parent><inode>26528</inode></directory>
<directory><parent>26529</parent><inode>26530</inode></directory>
<directory><parent>26531</parent><inode>26532</inode></directory>
<directory><parent>26533</parent><inode>26534</inode><inode>26536</inode><inode>26538</inode><inode>26540</inode><inode>26542</inode><inode>26544</inode><inode>26546</inode><inode>26548</inode><inode>26550</inode><inode>26552</inode></directory>
<directory><parent>26534</parent><inode>26535</inode></directory>
<directory><parent>26536</parent><inode>26537</inode></directory>
<directory><parent>26538</parent><inode>26539</inode></directory>
<directory><parent>26540</parent><inode>26541</inode></directory>
<directory><parent>26542</parent><inode>26543</inode></directory>
<directory><parent>26544</parent><inode>26545</inode></directory>
<directory><parent>26546</parent><inode>26547</inode></directory>
<directory><parent>26548</parent><inode>26549</inode></directory>
<directory><parent>26550</parent><inode>26551</inode></directory>
<directory><parent>26552</parent><inode>26553</inode></directory>
<directory><parent>26554</parent><inode>26555</inode><inode>26557</inode><inode>26559</inode><inode>26561</inode><inode>26563</inode><inode>26565</inode><inode>26567</inode><inode>26569</inode><inode>26571</inode><inode>26573</inode></directory>
<directory><parent>26555</parent><inode>26556</inode></directory>
<directory><parent>26557</parent><inode>26558</inode></directory>
<directory><parent>26559</parent><inode>26560</inode></directory>
<directory><parent>26561</parent><inode>26562</inode></directory>
<directory><parent>26563</parent><inode>26564</inode></directory>
<directory><parent>26565</parent><inode>26566</inode></directory>
<directory><parent>26567</parent><inode>26568</inode></directory>
<directory><parent>26569</parent><inode>26570</inode></directory>
<directory><parent>26571</parent><inode>26572</inode></directory>
<directory><parent>26573</parent><inode>26574</inode></directory>
<directory><parent>26575</parent><inode>26576</inode><inode>26578</inode><inode>26580</inode><inode>26582</inode><inode>26584</inode><inode>26586</inode><inode>26588</inode><inode>26590</inode><inode>26592</inode><inode>26594</inode></directory>
<directory><parent>26576</parent><inode>26577</inode></directory>
<directory><parent>26578</parent><inode>26579</inode></directory>
<directory><parent>26580</parent><inode>26581</inode></directory>
<directory><parent>26582</parent><inode>26583</inode></directory>
<directory><parent>26584</parent><inode>26585</inode></directory>
<directory><parent>26586</parent><inode>26587</inode></directory>
<directory><parent>26588</parent><inode>26589</inode></directory>
<directory><parent>26590</parent><inode>26591</inode></directory>
<directory><parent>26592</parent><inode>26593</inode></directory>
<directory><parent>26594</parent><inode>26595</inode></directory>
<directory><parent>26596</parent><inode>26597</inode><inode>26599</inode><inode>26601</inode><inode>26603</inode><inode>26605</inode><inode>26607</inode><inode>26609</inode><inode>26611</inode><inode>26613</inode><inode>26615</inode></directory>
<directory><parent>26597</parent><inode>26598</inode></directory>
<directory><parent>26599</parent><inode>26600</inode></directory>
<directory><parent>26601</parent><inode>26602</inode></directory>
<directory><parent>26603</parent><inode>26604</inode></directory>
<directory><parent>26605</parent><inode>26606</inode></directory>
<directory><parent>26607</parent><inode>26608</inode></directory>
<directory><parent>26609</parent><inode>26610</inode></directory>
<directory><parent>26611</parent><inode>26612</inode></directory>
<directory><parent>26613</parent><inode>26614</inode></directory>
<directory><parent>26615</parent><inode>26616</inode></directory>
<directory><parent>26617</parent><inode>26618</inode><inode>26620</inode><inode>26622</inode><inode>26624</inode><inode>26626</inode><inode>26628</inode><inode>26630</inode><inode>26632</inode><inode>26634</inode><inode>26636</inode></directory>
<directory><parent>26618</parent><inode>26619</inode></directory>
<directory><parent>26620</parent><inode>26621</inode></directory>
<directory><parent>26622</parent><inode>26623</inode></directory>
<directory><parent>26624</parent><inode>26625</inode></directory>
<directory><parent>26626</parent><inode>26627</inode></directory>
<directory><parent>26628</parent><inode>26629</inode></directory>
<directory><parent>26630</parent><inode>26631</inode></directory>
<directory><parent>26632</parent><inode>26633</inode></directory>
<directory><parent>26634</parent><inode>26635</inode></directory>
<directory><parent>26636</parent><inode>26637</inode></directory>
<directory><parent>26638</parent><inode>26639</inode><inode>26641</inode><inode>26643</inode><inode>26645</inode><inode>26647</inode><inode>26649</inode><inode>26651</inode><inode>26653</inode><inode>26655</inode><inode>26657</inode></directory>
<directory><parent>26639</parent><inode>26640</inode></directory>
<directory><parent>26641</parent><inode>26642</inode></directory>
<directory><parent>26643</parent><inode>26644</inode></directory>
<directory><parent>26645</parent><inode>26646</inode></directory>
<directory><parent>26647</parent><inode>26648</inode></directory>
<directory><parent>26649</parent><inode>26650</inode></directory>
<directory><parent>26651</parent><inode>26652</inode></directory>
<directory><parent>26653</parent><inode>26654</inode></directory>
<directory><parent>26655</parent><inode>26656</inode></directory>
<directory><parent>26657</parent><inode>26658</inode></directory>
<directory><parent>26659</parent><inode>26660</inode><inode>26662</inode><inode>26664</inode><inode>26666</inode><inode>26668</inode><inode>26670</inode><inode>26672</inode><inode>26674</inode><inode>26676</inode><inode>26678</inode></directory>
<directory><parent>26660</parent><inode>26661</inode></directory>
<directory><parent>26662</parent><inode>26663</inode></directory>
<directory><parent>26664</parent><inode>26665</inode></directory>
<directory><parent>26666</parent><inode>26667</inode></directory>
<directory><parent>26668</parent><inode>26669</inode></directory>
<directory><parent>26670</parent><inode>26671</inode></directory>
<directory><parent>26672</parent><inode>26673</inode></directory>
<directory><parent>26674</parent><inode>26675</inode></directory>
<directory><parent>26676</parent><inode>26677</inode></directory>
<directory><parent>26678</parent><inode>26679</inode></directory>
<directory><parent>26680</parent><inode>26681</inode><inode>26683</inode><inode>26685</inode><inode>26687</inode><inode>26689</inode><inode>26691</inode><inode>26693</inode><inode>26695</inode><inode>26697</inode><inode>26699</inode></directory>
<directory><parent>26681</parent><inode>26682</inode></directory>
<directory><parent>26683</parent><inode>26684</inode></directory>
<directory><parent>26685</parent><inode>26686</inode></directory>
<directory><parent>26687</parent><inode>26688</inode></directory>
<directory><parent>26689</parent><inode>26690</inode></directory>
<directory><parent>26691</parent><inode>26692</inode></directory>
<directory><parent>26693</parent><inode>26694</inode></directory>
<directory><parent>26695</parent><inode>26696</inode></directory>
<directory><parent>26697</parent><inode>26698</inode></directory>
<directory><parent>26699</parent><inode>26700</inode></directory>
</INodeDirectorySection>
<FileUnderConstructionSection></FileUnderConstructionSection>
<SnapshotDiffSection><diff><inodeid>16385</inodeid></diff></SnapshotDiffSection>
<SecretManagerSection><currentId>0</currentId><tokenSequenceNumber>0</tokenSequenceNumber></SecretManagerSection><CacheManagerSection><nextDirectiveId>1</nextDirectiveId></CacheManagerSection>
</fsimage>

View File

@ -0,0 +1,128 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-project-dist</artifactId>
<version>3.3.0-SNAPSHOT</version>
<relativePath>../../../hadoop-project-dist</relativePath>
</parent>
<artifactId>hadoop-dynamometer-dist</artifactId>
<version>3.3.0-SNAPSHOT</version>
<description>Apache Hadoop Dynamometer Dist</description>
<name>Apache Hadoop Dynamometer Dist</name>
<packaging>jar</packaging>
<properties>
<hadoop.component>tools</hadoop.component>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-dynamometer-blockgen</artifactId>
<scope>compile</scope>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-dynamometer-workload</artifactId>
<scope>compile</scope>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-dynamometer-infra</artifactId>
<scope>compile</scope>
<version>${project.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<configuration>
</configuration>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>dist</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-assemblies</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<executions>
<execution>
<id>dist</id>
<phase>prepare-package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<appendAssemblyId>false</appendAssemblyId>
<attach>false</attach>
<finalName>${project.artifactId}-${project.version}</finalName>
<descriptorRefs>
<descriptorRef>hadoop-dynamometer</descriptorRef>
</descriptorRefs>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/${project.artifactId}-${project.version}/share/hadoop/tools/lib</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -0,0 +1,201 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-project</artifactId>
<version>3.3.0-SNAPSHOT</version>
<relativePath>../../../hadoop-project</relativePath>
</parent>
<artifactId>hadoop-dynamometer-infra</artifactId>
<version>3.3.0-SNAPSHOT</version>
<description>Apache Hadoop Dynamometer Cluster Simulator</description>
<name>Apache Hadoop Dynamometer Cluster Simulator</name>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<!-- JUnit is required by MiniDFSCluster, which is used outside of tests -->
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<type>test-jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minicluster</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-dynamometer-workload</artifactId>
<scope>compile</scope>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.10.19</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-tests</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-dynamometer-workload</artifactId>
<type>test-jar</type>
<scope>test</scope>
<version>${project.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<configuration>
<attach>true</attach>
</configuration>
<executions>
<execution>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<configuration>
<excludes>
<exclude>src/test/resources/blocks/dn1-a-0-r-00001</exclude>
<exclude>src/test/resources/blocks/dn0-a-0-r-00000</exclude>
<exclude>src/test/resources/blocks/dn2-a-0-r-00002</exclude>
<exclude>src/test/resources/hadoop_3_1/fsimage_0000000000000061740.md5</exclude>
<exclude>src/test/resources/hadoop_3_1/VERSION</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>dist</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-assemblies</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<executions>
<execution>
<id>dist</id>
<phase>prepare-package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<appendAssemblyId>false</appendAssemblyId>
<attach>false</attach>
<finalName>${project.artifactId}-${project.version}</finalName>
<descriptorRefs>
<descriptorRef>hadoop-dynamometer-infra</descriptorRef>
</descriptorRefs>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>deplist</id>
<phase>compile</phase>
<goals>
<goal>list</goal>
</goals>
<configuration>
<!-- referenced by a built-in command -->
<outputFile>${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-builtin.txt</outputFile>
</configuration>
</execution>
<execution>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/lib</outputDirectory>
<includeScope>runtime</includeScope>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -0,0 +1,53 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [[ "$#" != 1 ]] || [[ "$1" == "-h" ]] || [[ "$1" == "--help" ]]; then
echo "Usage: ./create-slim-hadoop-tar.sh path-to-hadoop-tar"
echo " Takes path-to-hadoop-tar as a hadoop.tar.gz binary distribution"
echo " and removes portions of it that are unnecessary for dynamometer"
echo " (e.g. unrelated components like YARN)."
echo " This overwrites the original file."
echo " This is idempotent; you can safely rerun it on the same tar."
exit 1
fi
hadoopTar="$1"
# ls output is intended for human consumption
# shellcheck disable=SC2012
echo "Slimming $hadoopTar; size before is $(ls -lh "$hadoopTar" | awk '{ print $5 }')"
hadoopTarTmp="$hadoopTar.temporary"
mkdir -p "$hadoopTarTmp"
tar xzf "$hadoopTar" -C "$hadoopTarTmp"
baseDir="$(find -H "$hadoopTarTmp" -depth 1 -type d | head -n 1)" # Should only be one subdir
hadoopShare="$baseDir/share/hadoop"
# Remove unnecessary files
rm -rf "${baseDir}/share/doc" "${hadoopShare}/mapreduce ${hadoopShare}/yarn" \
"${hadoopShare}/kms" "${hadoopShare}/tools" "${hadoopShare}/httpfs" \
"${hadoopShare}"/*/sources "${hadoopShare}"/*/jdiff
tar czf "$hadoopTarTmp.tar.gz" -C "$hadoopTarTmp" .
rm -rf "$hadoopTarTmp"
mv -f "$hadoopTarTmp.tar.gz" "$hadoopTar"
# ls output is intended for human consumption
# shellcheck disable=SC2012
echo "Finished; size after is $(ls -lh "$hadoopTar" | awk '{ print $5 }')"

View File

@ -0,0 +1,84 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script is used for fetching the standard Hadoop metrics which the
# Dynamometer NameNode generates during its execution (standard Hadoop metrics).
# Those metrics are uploaded onto HDFS when the Dynamometer application completes.
# This script will download them locally and parse out the specified metric for
# the given time period. This is useful to, for example, isolate only the metrics
# produced during the workload replay portion of a job. For this, specify startTimeMs
# as the start time of the workload job (which it logs during execution) and
# periodMinutes the period (in minutes) of the replay.
if [ $# -lt 5 ]; then
echo "Usage:"
echo "./parse-metrics.sh applicationID outputFileName startTimeMs periodMinutes metricName [ context ] [ isCounter ]"
echo "If no file namenode_metrics_{applicationID} is present in the working directory,"
echo "attempts to download one from HDFS for applicationID. Filters values"
echo "for the specified metric, during the range"
echo "(startTimeMs, startTimeMs + periodMinutes) optionally filtering on the context as well"
echo "(which is just applied as a regex search across the metric line output)"
echo "and outputs CSV pairs of (seconds_since_start_time,value)."
echo "If isCounter is true, treats the metrics as a counter and outputs per-second rate values."
exit 1
fi
appId="$1"
output="$2"
start_ts="$3"
period_minutes="$4"
metric="$5"
context="$6"
is_counter="$7"
localFile="namenode_metrics_$appId"
if [ ! -f "$localFile" ]; then
remoteFile=".dynamometer/$appId/namenode_metrics"
echo "Downloading file from HDFS: $remoteFile"
if ! hdfs dfs -copyToLocal "$remoteFile" "$localFile"; then
exit 1
fi
fi
read -d '' -r awk_script <<'EOF'
BEGIN {
metric_regex="[[:space:]]"metric"=([[:digit:].E]+)";
end_ts=start_ts+(period_minutes*60*1000)
last_val=0
last_ts=start_ts
}
"true" ~ is_counter && $0 ~ metric_regex && $0 ~ context && $1 < start_ts {
match($0, metric_regex, val_arr);
last_val=val_arr[1]
last_ts=$1
}
$0 ~ metric_regex && $0 ~ context && $1 >= start_ts && $1 <= end_ts {
match($0, metric_regex, val_arr);
val=val_arr[1]
if (is_counter == "true") {
tmp=val
val=val-last_val
val=val/(($1-last_ts)/1000)
last_ts=$1
last_val=tmp
}
printf("%.0f,%.6f\n", ($0-start_ts)/1000, val)
}
EOF
gawk -v metric="$metric" -v context="$context" -v start_ts="$start_ts" \
-v period_minutes="$period_minutes" -v is_counter="$is_counter" -v OFS="," "$awk_script" "$localFile" > "$output"

View File

@ -0,0 +1,41 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script simply passes its arguments along to the infrastructure / cluster
# driver after finding a hadoop command in PATH/HADOOP_COMMON_HOME/HADOOP_HOME
# (searching in that order).
if type hadoop &> /dev/null; then
hadoop_cmd="hadoop"
elif type "$HADOOP_COMMON_HOME/bin/hadoop" &> /dev/null; then
hadoop_cmd="$HADOOP_COMMON_HOME/bin/hadoop"
elif type "$HADOOP_HOME/bin/hadoop" &> /dev/null; then
hadoop_cmd="$HADOOP_HOME/bin/hadoop"
else
echo "Unable to find a valid hadoop command to execute; exiting."
exit 1
fi
script_pwd="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
for f in ${script_pwd}/lib/*.jar; do
# Skip adding the infra JAR since it is added by the `hadoop jar` command
if [[ "$f" != *"dynamometer-infra-"* ]]; then
export HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$f"
fi
done
"$hadoop_cmd" jar "${script_pwd}"/lib/dynamometer-infra-*.jar org.apache.hadoop.tools.dynamometer.Client "$@"

View File

@ -0,0 +1,84 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -lt 2 ] || [ "$1" == "-h" ] || [ "$1" == "--help" ]; then
echo "Usage: ./upload-fsimage.sh image-txid remote-dir [ name-dir ]"
echo " Takes the steps necessary to place all necessary components for"
echo " the fsimage with the specified transaction ID (leading 0s not"
echo " required) onto HDFS. This includes the fsimage file itself,"
echo " the MD5 hash, the VERSION file, and the XML version of the"
echo " fsimage file, which will be temporarily generated by this script"
echo " (into \$TMPDIR if specified, else /tmp)."
echo " These files will be uploaded to remote_dir (created if it does"
echo " not yet exist). This is the same directory that should be passed"
echo " to the Client as fs_image_dir."
echo " If name-dir is specified, looks for fsimage files under"
echo " \${name-dir}/current. Otherwise, looks in the current directory."
exit 1
fi
image_txid="$1"
remote_dir="$2"
if [[ $# -ge 3 ]]; then
name_dir="$3/current"
else
name_dir="$(pwd)"
fi
image_file_count="$(find -H "${name_dir}" -depth 1 -name "fsimage_*$image_txid" -type f | wc -l)"
if [[ "$image_file_count" != 1 ]]; then
echo "Error; found $image_file_count matching fsimage files."
exit 1
fi
image_file="$(find -H "${name_dir}" -depth 1 -name "fsimage_*$image_txid" -type f)"
image_file_name="$(basename "${image_file}")"
echo "Using fsimage: $image_file_name"
image_file_md5="${image_file}.md5"
version_file="${name_dir}/VERSION"
if ! [[ -f "$image_file_md5" ]]; then
echo "Unable to find MD5 file for fsimage; looked at $image_file_md5"
exit 1
fi
if ! [[ -f "$version_file" ]]; then
echo "Unable to find VERSION file; looked at $version_file"
exit 1
fi
if ! tmp_dir="$(mktemp -d)"; then
echo "mktemp failed to make a temporary directory; exiting"
exit 1
fi
image_file_xml="${tmp_dir}/${image_file_name}.xml"
echo "Creating temporary XML fsimage file at $image_file_xml"
hdfs oiv -p XML -i "$image_file" -o "$image_file_xml"
echo "Created temporary XML fsimage file"
if ! hdfs dfs -mkdir -p "$remote_dir"; then
echo "Unable to create ${remote_dir}; exiting"
rm -rf "${tmp_dir}"
exit 1
fi
for file in ${image_file} ${image_file_xml} ${image_file_md5} ${version_file}; do
echo "Uploading ${file}..."
if ! hdfs dfs -copyFromLocal -f "$file" "${remote_dir}"; then
echo "Error while uploading ${file}; exiting"
rm -rf "${tmp_dir}"
exit 1
fi
done
rm -rf "${tmp_dir}"
echo "Complete!"

View File

@ -0,0 +1,316 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer;
import com.google.common.base.Preconditions;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
/**
* Options supplied to the Client which are then passed through to the
* ApplicationMaster.
*/
class AMOptions {
public static final String NAMENODE_MEMORY_MB_ARG = "namenode_memory_mb";
public static final String NAMENODE_MEMORY_MB_DEFAULT = "2048";
public static final String NAMENODE_VCORES_ARG = "namenode_vcores";
public static final String NAMENODE_VCORES_DEFAULT = "1";
public static final String NAMENODE_NODELABEL_ARG = "namenode_nodelabel";
public static final String NAMENODE_ARGS_ARG = "namenode_args";
public static final String DATANODE_MEMORY_MB_ARG = "datanode_memory_mb";
public static final String DATANODE_MEMORY_MB_DEFAULT = "2048";
public static final String DATANODE_VCORES_ARG = "datanode_vcores";
public static final String DATANODE_VCORES_DEFAULT = "1";
public static final String DATANODE_NODELABEL_ARG = "datanode_nodelabel";
public static final String DATANODE_ARGS_ARG = "datanode_args";
public static final String NAMENODE_METRICS_PERIOD_ARG =
"namenode_metrics_period";
public static final String NAMENODE_METRICS_PERIOD_DEFAULT = "60";
public static final String SHELL_ENV_ARG = "shell_env";
public static final String DATANODES_PER_CLUSTER_ARG =
"datanodes_per_cluster";
public static final String DATANODES_PER_CLUSTER_DEFAULT = "1";
public static final String DATANODE_LAUNCH_DELAY_ARG =
"datanode_launch_delay";
public static final String DATANODE_LAUNCH_DELAY_DEFAULT = "0s";
public static final String NAMENODE_NAME_DIR_ARG = "namenode_name_dir";
public static final String NAMENODE_EDITS_DIR_ARG = "namenode_edits_dir";
private final int datanodeMemoryMB;
private final int datanodeVirtualCores;
private final String datanodeArgs;
private final String datanodeNodeLabelExpression;
private final int datanodesPerCluster;
private final String datanodeLaunchDelay;
private final int namenodeMemoryMB;
private final int namenodeVirtualCores;
private final String namenodeArgs;
private final String namenodeNodeLabelExpression;
private final int namenodeMetricsPeriod;
private final String namenodeNameDir;
private final String namenodeEditsDir;
// Original shellEnv as passed in through arguments
private final Map<String, String> originalShellEnv;
// Extended shellEnv including custom environment variables
private final Map<String, String> shellEnv;
AMOptions(int datanodeMemoryMB, int datanodeVirtualCores, String datanodeArgs,
String datanodeNodeLabelExpression, int datanodesPerCluster,
String datanodeLaunchDelay, int namenodeMemoryMB,
int namenodeVirtualCores, String namenodeArgs,
String namenodeNodeLabelExpression, int namenodeMetricsPeriod,
String namenodeNameDir, String namenodeEditsDir,
Map<String, String> shellEnv) {
this.datanodeMemoryMB = datanodeMemoryMB;
this.datanodeVirtualCores = datanodeVirtualCores;
this.datanodeArgs = datanodeArgs;
this.datanodeNodeLabelExpression = datanodeNodeLabelExpression;
this.datanodesPerCluster = datanodesPerCluster;
this.datanodeLaunchDelay = datanodeLaunchDelay;
this.namenodeMemoryMB = namenodeMemoryMB;
this.namenodeVirtualCores = namenodeVirtualCores;
this.namenodeArgs = namenodeArgs;
this.namenodeNodeLabelExpression = namenodeNodeLabelExpression;
this.namenodeMetricsPeriod = namenodeMetricsPeriod;
this.namenodeNameDir = namenodeNameDir;
this.namenodeEditsDir = namenodeEditsDir;
this.originalShellEnv = shellEnv;
this.shellEnv = new HashMap<>(this.originalShellEnv);
this.shellEnv.put(DynoConstants.NN_ADDITIONAL_ARGS_ENV, this.namenodeArgs);
this.shellEnv.put(DynoConstants.DN_ADDITIONAL_ARGS_ENV, this.datanodeArgs);
this.shellEnv.put(DynoConstants.NN_FILE_METRIC_PERIOD_ENV,
String.valueOf(this.namenodeMetricsPeriod));
this.shellEnv.put(DynoConstants.NN_NAME_DIR_ENV, this.namenodeNameDir);
this.shellEnv.put(DynoConstants.NN_EDITS_DIR_ENV, this.namenodeEditsDir);
}
/**
* Verifies that arguments are valid; throws IllegalArgumentException if not.
*/
void verify(long maxMemory, int maxVcores) throws IllegalArgumentException {
Preconditions.checkArgument(
datanodeMemoryMB > 0 && datanodeMemoryMB <= maxMemory,
"datanodeMemoryMB (%s) must be between 0 and %s", datanodeMemoryMB,
maxMemory);
Preconditions.checkArgument(
datanodeVirtualCores > 0 && datanodeVirtualCores <= maxVcores,
"datanodeVirtualCores (%s) must be between 0 and %s",
datanodeVirtualCores, maxVcores);
Preconditions.checkArgument(
namenodeMemoryMB > 0 && namenodeMemoryMB <= maxMemory,
"namenodeMemoryMB (%s) must be between 0 and %s", namenodeMemoryMB,
maxMemory);
Preconditions.checkArgument(
namenodeVirtualCores > 0 && namenodeVirtualCores <= maxVcores,
"namenodeVirtualCores (%s) must be between 0 and %s",
namenodeVirtualCores, maxVcores);
Preconditions.checkArgument(datanodesPerCluster > 0,
"datanodesPerCluster (%s) must be > 0", datanodesPerCluster);
}
/**
* Same as {@link #verify(long, int)} but does not set a max.
*/
void verify() throws IllegalArgumentException {
verify(Integer.MAX_VALUE, Integer.MAX_VALUE);
}
void addToVargs(List<String> vargs) {
vargs.add("--" + DATANODE_MEMORY_MB_ARG + " " + datanodeMemoryMB);
vargs.add("--" + DATANODE_VCORES_ARG + " " + datanodeVirtualCores);
addStringValToVargs(vargs, DATANODE_ARGS_ARG, datanodeArgs);
addStringValToVargs(vargs, DATANODE_NODELABEL_ARG,
datanodeNodeLabelExpression);
vargs.add("--" + DATANODES_PER_CLUSTER_ARG + " " + datanodesPerCluster);
vargs.add("--" + DATANODE_LAUNCH_DELAY_ARG + " " + datanodeLaunchDelay);
vargs.add("--" + NAMENODE_MEMORY_MB_ARG + " " + namenodeMemoryMB);
vargs.add("--" + NAMENODE_VCORES_ARG + " " + namenodeVirtualCores);
addStringValToVargs(vargs, NAMENODE_ARGS_ARG, namenodeArgs);
addStringValToVargs(vargs, NAMENODE_NODELABEL_ARG,
namenodeNodeLabelExpression);
vargs.add("--" + NAMENODE_METRICS_PERIOD_ARG + " " + namenodeMetricsPeriod);
addStringValToVargs(vargs, NAMENODE_NAME_DIR_ARG, namenodeNameDir);
addStringValToVargs(vargs, NAMENODE_EDITS_DIR_ARG, namenodeEditsDir);
for (Map.Entry<String, String> entry : originalShellEnv.entrySet()) {
vargs.add(
"--" + SHELL_ENV_ARG + " " + entry.getKey() + "=" + entry.getValue());
}
}
private void addStringValToVargs(List<String> vargs, String optionName,
String val) {
if (!val.isEmpty()) {
vargs.add("--" + optionName + " \\\"" + val + "\\\"");
}
}
int getDataNodeMemoryMB() {
return datanodeMemoryMB;
}
int getDataNodeVirtualCores() {
return datanodeVirtualCores;
}
String getDataNodeNodeLabelExpression() {
return datanodeNodeLabelExpression;
}
int getDataNodesPerCluster() {
return datanodesPerCluster;
}
long getDataNodeLaunchDelaySec() {
// Leverage the human-readable time parsing capabilities of Configuration
String tmpConfKey = "___temp_config_property___";
Configuration tmpConf = new Configuration();
tmpConf.set(tmpConfKey, datanodeLaunchDelay);
return tmpConf.getTimeDuration(tmpConfKey, 0, TimeUnit.SECONDS);
}
int getNameNodeMemoryMB() {
return namenodeMemoryMB;
}
int getNameNodeVirtualCores() {
return namenodeVirtualCores;
}
String getNameNodeNodeLabelExpression() {
return namenodeNodeLabelExpression;
}
Map<String, String> getShellEnv() {
return shellEnv;
}
/**
* Set all of the command line options relevant to this class into the passed
* {@link Options}.
*
* @param opts
* Where to set the command line options.
*/
static void setOptions(Options opts) {
opts.addOption(SHELL_ENV_ARG, true,
"Environment for shell script. Specified as env_key=env_val pairs");
opts.addOption(NAMENODE_MEMORY_MB_ARG, true,
"Amount of memory in MB to be requested to run the NN (default "
+ NAMENODE_MEMORY_MB_DEFAULT + "). "
+ "Ignored unless the NameNode is run within YARN.");
opts.addOption(NAMENODE_VCORES_ARG, true,
"Amount of virtual cores to be requested to run the NN (default "
+ NAMENODE_VCORES_DEFAULT + "). "
+ "Ignored unless the NameNode is run within YARN.");
opts.addOption(NAMENODE_ARGS_ARG, true,
"Additional arguments to add when starting the NameNode. "
+ "Ignored unless the NameNode is run within YARN.");
opts.addOption(NAMENODE_NODELABEL_ARG, true,
"The node label to specify for the container to use to "
+ "run the NameNode.");
opts.addOption(NAMENODE_METRICS_PERIOD_ARG, true,
"The period in seconds for the NameNode's metrics to be emitted to "
+ "file; if <=0, disables this functionality. Otherwise, a "
+ "metrics file will be stored in the container logs for the "
+ "NameNode (default " + NAMENODE_METRICS_PERIOD_DEFAULT + ").");
opts.addOption(NAMENODE_NAME_DIR_ARG, true,
"The directory to use for the NameNode's name data directory. "
+ "If not specified, a location within the container's working "
+ "directory will be used.");
opts.addOption(NAMENODE_EDITS_DIR_ARG, true,
"The directory to use for the NameNode's edits directory. "
+ "If not specified, a location within the container's working "
+ "directory will be used.");
opts.addOption(DATANODE_MEMORY_MB_ARG, true,
"Amount of memory in MB to be requested to run the DNs (default "
+ DATANODE_MEMORY_MB_DEFAULT + ")");
opts.addOption(DATANODE_VCORES_ARG, true,
"Amount of virtual cores to be requested to run the DNs (default "
+ DATANODE_VCORES_DEFAULT + ")");
opts.addOption(DATANODE_ARGS_ARG, true,
"Additional arguments to add when starting the DataNodes.");
opts.addOption(DATANODE_NODELABEL_ARG, true, "The node label to specify "
+ "for the container to use to run the DataNode.");
opts.addOption(DATANODES_PER_CLUSTER_ARG, true,
"How many simulated DataNodes to run within each YARN container "
+ "(default " + DATANODES_PER_CLUSTER_DEFAULT + ")");
opts.addOption(DATANODE_LAUNCH_DELAY_ARG, true,
"The period over which to launch the DataNodes; this will "
+ "be used as the maximum delay and each DataNode container will "
+ "be launched with some random delay less than this value. "
+ "Accepts human-readable time durations (e.g. 10s, 1m) (default "
+ DATANODE_LAUNCH_DELAY_DEFAULT + ")");
opts.addOption("help", false, "Print usage");
}
/**
* Initialize an {@code AMOptions} from a command line parser.
*
* @param cliParser
* Where to initialize from.
* @return A new {@code AMOptions} filled out with options from the parser.
*/
static AMOptions initFromParser(CommandLine cliParser) {
Map<String, String> originalShellEnv = new HashMap<>();
if (cliParser.hasOption(SHELL_ENV_ARG)) {
for (String env : cliParser.getOptionValues(SHELL_ENV_ARG)) {
String trimmed = env.trim();
int index = trimmed.indexOf('=');
if (index == -1) {
originalShellEnv.put(trimmed, "");
continue;
}
String key = trimmed.substring(0, index);
String val = "";
if (index < (trimmed.length() - 1)) {
val = trimmed.substring(index + 1);
}
originalShellEnv.put(key, val);
}
}
return new AMOptions(
Integer.parseInt(cliParser.getOptionValue(DATANODE_MEMORY_MB_ARG,
DATANODE_MEMORY_MB_DEFAULT)),
Integer.parseInt(cliParser.getOptionValue(DATANODE_VCORES_ARG,
DATANODE_VCORES_DEFAULT)),
cliParser.getOptionValue(DATANODE_ARGS_ARG, ""),
cliParser.getOptionValue(DATANODE_NODELABEL_ARG, ""),
Integer.parseInt(cliParser.getOptionValue(DATANODES_PER_CLUSTER_ARG,
DATANODES_PER_CLUSTER_DEFAULT)),
cliParser.getOptionValue(DATANODE_LAUNCH_DELAY_ARG,
DATANODE_LAUNCH_DELAY_DEFAULT),
Integer.parseInt(cliParser.getOptionValue(NAMENODE_MEMORY_MB_ARG,
NAMENODE_MEMORY_MB_DEFAULT)),
Integer.parseInt(cliParser.getOptionValue(NAMENODE_VCORES_ARG,
NAMENODE_VCORES_DEFAULT)),
cliParser.getOptionValue(NAMENODE_ARGS_ARG, ""),
cliParser.getOptionValue(NAMENODE_NODELABEL_ARG, ""),
Integer.parseInt(cliParser.getOptionValue(NAMENODE_METRICS_PERIOD_ARG,
NAMENODE_METRICS_PERIOD_DEFAULT)),
cliParser.getOptionValue(NAMENODE_NAME_DIR_ARG, ""),
cliParser.getOptionValue(NAMENODE_EDITS_DIR_ARG, ""), originalShellEnv);
}
}

View File

@ -0,0 +1,39 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authorize.ImpersonationProvider;
/**
* An {@link ImpersonationProvider} that indiscriminately allows all users to
* proxy as any other user.
*/
public class AllowAllImpersonationProvider extends Configured
implements ImpersonationProvider {
public void init(String configurationPrefix) {
// Do nothing
}
public void authorize(UserGroupInformation user, String remoteAddress) {
// Do nothing
}
}

View File

@ -0,0 +1,885 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import com.google.common.primitives.Ints;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Supplier;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.URL;
import org.apache.hadoop.yarn.api.records.UpdatedContainer;
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
import org.apache.hadoop.yarn.client.api.async.NMClientAsync;
import org.apache.hadoop.yarn.client.api.async.impl.NMClientAsyncImpl;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.util.Records;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The ApplicationMaster for Dynamometer. This will launch DataNodes in YARN
* containers. If the RPC address of a NameNode is specified, it will configure
* the DataNodes to talk to that NameNode. Else, a NameNode will be launched as
* part of this YARN application. This does not implement any retry/failure
* handling.
* TODO: Add proper retry/failure handling
*
* <p/>The AM will persist until it has run for a period of time equal to the
* timeout specified or until the application is killed.
*
* <p/>If the NameNode is launched internally, it will upload some information
* onto the remote HDFS instance (i.e., the default FileSystem) about its
* hostname and ports. This is in the location determined by the
* {@link DynoConstants#DYNAMOMETER_STORAGE_DIR} and
* {@link DynoConstants#NN_INFO_FILE_NAME} constants and is in the
* {@link Properties} file format. This is consumed by this AM as well as the
* {@link Client} to determine how to contact the NameNode.
*
* <p/>Information about the location of the DataNodes is logged by the AM.
*/
@InterfaceAudience.Public
@InterfaceStability.Unstable
public class ApplicationMaster {
private static final Logger LOG =
LoggerFactory.getLogger(ApplicationMaster.class);
private static final Random RAND = new Random();
// Configuration
private Configuration conf;
// Handle to communicate with the Resource Manager
private AMRMClientAsync<ContainerRequest> amRMClient;
// Handle to communicate with the Node Manager
private NMClientAsync nmClientAsync;
// Listen to process the response from the Node Manager
private NMCallbackHandler containerListener;
// The collection of options passed in via the Client
private AMOptions amOptions;
private List<LocalResource> blockListFiles;
private int numTotalDataNodes;
private int numTotalDataNodeContainers;
// Counter for completed datanodes (complete denotes successful or failed )
private AtomicInteger numCompletedDataNodeContainers = new AtomicInteger();
// Allocated datanode count so that we know how many datanodes has the RM
// allocated to us
private AtomicInteger numAllocatedDataNodeContainers = new AtomicInteger();
// Count of failed datanodes
private AtomicInteger numFailedDataNodeContainers = new AtomicInteger();
// True iff the application has completed and is ready for cleanup
// Once true, will never be false. This variable should not be accessed
// directly but rather through the isComplete, waitForCompletion, and
// markCompleted methods.
private boolean completed = false;
private final Object completionLock = new Object();
private ByteBuffer allTokens;
// Launch threads
private List<Thread> launchThreads = new ArrayList<>();
// True iff this AM should launch and manage a Namanode
private boolean launchNameNode;
// The service RPC address of a remote NameNode to be contacted by the
// launched DataNodes
private String namenodeServiceRpcAddress = "";
// Directory to use for remote storage (a location on the remote FS which
// can be accessed by all components)
private Path remoteStoragePath;
// The ACLs to view the launched containers
private Map<ApplicationAccessType, String> applicationAcls;
// The container the NameNode is running within
private volatile Container namenodeContainer;
// Map of the containers that the DataNodes are running within
private ConcurrentMap<ContainerId, Container> datanodeContainers =
new ConcurrentHashMap<>();
// Username of the user who launched this application.
private String launchingUser;
/**
* @param args Command line args
*/
public static void main(String[] args) {
boolean result = false;
try {
ApplicationMaster appMaster = new ApplicationMaster();
LOG.info("Initializing ApplicationMaster");
boolean doRun = appMaster.init(args);
if (!doRun) {
System.exit(0);
}
result = appMaster.run();
} catch (Throwable t) {
LOG.error("Error running ApplicationMaster", t);
System.exit(1);
}
if (result) {
LOG.info("Application Master completed successfully. exiting");
System.exit(0);
} else {
LOG.info("Application Master failed. exiting");
System.exit(2);
}
}
public ApplicationMaster() {
// Set up the configuration
conf = new YarnConfiguration();
}
/**
* Parse command line options.
*
* @param args Command line args
* @return Whether init successful and run should be invoked
*/
public boolean init(String[] args) throws ParseException {
Options opts = new Options();
AMOptions.setOptions(opts);
CommandLine cliParser = new GnuParser().parse(opts, args);
if (args.length == 0) {
printUsage(opts);
throw new IllegalArgumentException(
"No args specified for application master to initialize");
}
if (cliParser.hasOption("help")) {
printUsage(opts);
return false;
}
Map<String, String> envs = System.getenv();
remoteStoragePath = new Path(
envs.get(DynoConstants.REMOTE_STORAGE_PATH_ENV));
applicationAcls = new HashMap<>();
applicationAcls.put(ApplicationAccessType.VIEW_APP,
envs.get(DynoConstants.JOB_ACL_VIEW_ENV));
launchingUser = envs.get(Environment.USER.name());
if (envs.containsKey(DynoConstants.REMOTE_NN_RPC_ADDR_ENV)) {
launchNameNode = false;
namenodeServiceRpcAddress = envs
.get(DynoConstants.REMOTE_NN_RPC_ADDR_ENV);
} else {
launchNameNode = true;
// namenodeServiceRpcAddress will be set in run() once properties are
// available
}
ContainerId containerId =
ContainerId.fromString(envs.get(Environment.CONTAINER_ID.name()));
ApplicationAttemptId appAttemptID = containerId.getApplicationAttemptId();
LOG.info("Application master for app: appId={}, clusterTimestamp={}, "
+ "attemptId={}", appAttemptID.getApplicationId().getId(),
appAttemptID.getApplicationId().getClusterTimestamp(),
appAttemptID.getAttemptId());
amOptions = AMOptions.initFromParser(cliParser);
return true;
}
/**
* Helper function to print usage.
*
* @param opts arsed command line options
*/
private void printUsage(Options opts) {
new HelpFormatter().printHelp("ApplicationMaster", opts);
}
/**
* Main run function for the application master.
*
* @return True if the application completed successfully; false if if exited
* unexpectedly, failed, was killed, etc.
*/
public boolean run() throws YarnException, IOException, InterruptedException {
LOG.info("Starting ApplicationMaster");
Credentials credentials = UserGroupInformation.getCurrentUser()
.getCredentials();
DataOutputBuffer dob = new DataOutputBuffer();
credentials.writeTokenStorageToStream(dob);
// Now remove the AM->RM token so that containers cannot access it.
credentials.getAllTokens().removeIf((token) ->
token.getKind().equals(AMRMTokenIdentifier.KIND_NAME));
allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
AMRMClientAsync.AbstractCallbackHandler allocListener =
new RMCallbackHandler();
amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
amRMClient.init(conf);
amRMClient.start();
containerListener = createNMCallbackHandler();
nmClientAsync = new NMClientAsyncImpl(containerListener);
nmClientAsync.init(conf);
nmClientAsync.start();
// Register self with ResourceManager
// This will start heartbeating to the RM
String appMasterHostname = NetUtils.getHostname();
amRMClient.registerApplicationMaster(appMasterHostname, -1, "");
// Supplier to use to indicate to wait-loops to stop waiting
Supplier<Boolean> exitCritera = this::isComplete;
Optional<Properties> namenodeProperties = Optional.empty();
if (launchNameNode) {
ContainerRequest nnContainerRequest = setupContainerAskForRM(
amOptions.getNameNodeMemoryMB(), amOptions.getNameNodeVirtualCores(),
0, amOptions.getNameNodeNodeLabelExpression());
LOG.info("Requested NameNode ask: " + nnContainerRequest.toString());
amRMClient.addContainerRequest(nnContainerRequest);
// Wait for the NN container to make its information available on the
// shared
// remote file storage
Path namenodeInfoPath = new Path(remoteStoragePath,
DynoConstants.NN_INFO_FILE_NAME);
LOG.info("Waiting on availability of NameNode information at "
+ namenodeInfoPath);
namenodeProperties = DynoInfraUtils.waitForAndGetNameNodeProperties(
exitCritera, conf, namenodeInfoPath, LOG);
if (!namenodeProperties.isPresent()) {
cleanup();
return false;
}
namenodeServiceRpcAddress = DynoInfraUtils
.getNameNodeServiceRpcAddr(namenodeProperties.get()).toString();
LOG.info("NameNode information: " + namenodeProperties.get());
LOG.info("NameNode can be reached at: " + DynoInfraUtils
.getNameNodeHdfsUri(namenodeProperties.get()).toString());
DynoInfraUtils.waitForNameNodeStartup(namenodeProperties.get(),
exitCritera, LOG);
} else {
LOG.info("Using remote NameNode with RPC address: "
+ namenodeServiceRpcAddress);
}
blockListFiles = Collections
.synchronizedList(getDataNodeBlockListingFiles());
numTotalDataNodes = blockListFiles.size();
if (numTotalDataNodes == 0) {
LOG.error(
"No block listing files were found! Cannot run with 0 DataNodes.");
markCompleted();
return false;
}
numTotalDataNodeContainers = (int) Math.ceil(((double) numTotalDataNodes)
/ Math.max(1, amOptions.getDataNodesPerCluster()));
LOG.info("Requesting {} DataNode containers with {} MB memory, {} vcores",
numTotalDataNodeContainers, amOptions.getDataNodeMemoryMB(),
amOptions.getDataNodeVirtualCores());
for (int i = 0; i < numTotalDataNodeContainers; ++i) {
ContainerRequest datanodeAsk = setupContainerAskForRM(
amOptions.getDataNodeMemoryMB(), amOptions.getDataNodeVirtualCores(),
1, amOptions.getDataNodeNodeLabelExpression());
amRMClient.addContainerRequest(datanodeAsk);
LOG.debug("Requested datanode ask: " + datanodeAsk.toString());
}
LOG.info("Finished requesting datanode containers");
if (launchNameNode) {
DynoInfraUtils.waitForNameNodeReadiness(namenodeProperties.get(),
numTotalDataNodes, true, exitCritera, conf, LOG);
}
waitForCompletion();
return cleanup();
}
private NMCallbackHandler createNMCallbackHandler() {
return new NMCallbackHandler();
}
/**
* Wait until the application has finished and is ready for cleanup.
*/
private void waitForCompletion() throws InterruptedException {
synchronized (completionLock) {
while (!completed) {
completionLock.wait();
}
}
}
/**
* Check completion status of the application.
*
* @return True iff it has completed.
*/
private boolean isComplete() {
synchronized (completionLock) {
return completed;
}
}
/**
* Mark that this application should begin cleaning up and exit.
*/
private void markCompleted() {
synchronized (completionLock) {
completed = true;
completionLock.notify();
}
}
/**
* @return True iff the application successfully completed
*/
private boolean cleanup() {
// Join all launched threads
// needed for when we time out
// and we need to release containers
for (Thread launchThread : launchThreads) {
try {
launchThread.join(10000);
} catch (InterruptedException e) {
LOG.info("Exception thrown in thread join: " + e.getMessage());
e.printStackTrace();
}
}
// When the application completes, it should stop all running containers
LOG.info("Application completed. Stopping running containers");
nmClientAsync.stop();
// When the application completes, it should send a finish application
// signal to the RM
LOG.info("Application completed. Signalling finish to RM");
FinalApplicationStatus appStatus;
String appMessage = null;
boolean success;
if (numFailedDataNodeContainers.get() == 0
&& numCompletedDataNodeContainers.get() == numTotalDataNodes) {
appStatus = FinalApplicationStatus.SUCCEEDED;
success = true;
} else {
appStatus = FinalApplicationStatus.FAILED;
appMessage = "Diagnostics: total=" + numTotalDataNodeContainers
+ ", completed=" + numCompletedDataNodeContainers.get()
+ ", allocated=" + numAllocatedDataNodeContainers.get()
+ ", failed=" + numFailedDataNodeContainers.get();
success = false;
}
try {
amRMClient.unregisterApplicationMaster(appStatus, appMessage, null);
} catch (YarnException|IOException ex) {
LOG.error("Failed to unregister application", ex);
}
amRMClient.stop();
return success;
}
private class RMCallbackHandler
extends AMRMClientAsync.AbstractCallbackHandler {
@Override
public void onContainersCompleted(
List<ContainerStatus> completedContainers) {
LOG.info("Got response from RM for container ask, completedCnt="
+ completedContainers.size());
for (ContainerStatus containerStatus : completedContainers) {
String containerInfo = "containerID=" + containerStatus.getContainerId()
+ ", state=" + containerStatus.getState() + ", exitStatus="
+ containerStatus.getExitStatus() + ", diagnostics="
+ StringUtils.abbreviate(containerStatus.getDiagnostics(), 1000);
String component;
if (isNameNode(containerStatus.getContainerId())) {
component = "NAMENODE";
} else if (isDataNode(containerStatus.getContainerId())) {
component = "DATANODE";
} else {
LOG.error("Received container status for unknown container: "
+ containerInfo);
continue;
}
LOG.info(
"Got container status for " + component + ": " + containerInfo);
// non complete containers should not be here
assert (containerStatus.getState() == ContainerState.COMPLETE);
if (component.equals("NAMENODE")) {
LOG.info("NameNode container completed; marking application as done");
markCompleted();
}
// increment counters for completed/failed containers
int exitStatus = containerStatus.getExitStatus();
int completedIdx = numCompletedDataNodeContainers.incrementAndGet();
if (0 != exitStatus) {
numFailedDataNodeContainers.incrementAndGet();
} else {
LOG.info("DataNode {} completed successfully, containerId={}",
completedIdx, containerStatus.getContainerId());
}
}
if (numCompletedDataNodeContainers.get() == numTotalDataNodeContainers) {
LOG.info(
"All datanode containers completed; marking application as done");
markCompleted();
}
}
@Override
public void onContainersAllocated(List<Container> allocatedContainers) {
LOG.info("Got response from RM for container ask, allocatedCnt="
+ allocatedContainers.size());
for (Container container : allocatedContainers) {
LaunchContainerRunnable containerLauncher;
String componentType;
Resource rsrc = container.getResource();
if (launchNameNode
&& rsrc.getMemorySize() >= amOptions.getNameNodeMemoryMB()
&& rsrc.getVirtualCores() >= amOptions.getNameNodeVirtualCores()
&& namenodeContainer == null) {
namenodeContainer = container;
componentType = "NAMENODE";
containerLauncher = new LaunchContainerRunnable(container, true);
} else if (rsrc.getMemorySize() >= amOptions.getDataNodeMemoryMB()
&& rsrc.getVirtualCores() >= amOptions.getDataNodeVirtualCores()
&& numAllocatedDataNodeContainers.get() < numTotalDataNodes) {
if (launchNameNode && namenodeContainer == null) {
LOG.error("Received a container with following resources suited "
+ "for a DataNode but no NameNode container exists: "
+ "containerMem=" + rsrc.getMemorySize() + ", containerVcores="
+ rsrc.getVirtualCores());
continue;
}
numAllocatedDataNodeContainers.getAndIncrement();
datanodeContainers.put(container.getId(), container);
componentType = "DATANODE";
containerLauncher = new LaunchContainerRunnable(container, false);
} else {
LOG.warn("Received unwanted container allocation: " + container);
nmClientAsync.stopContainerAsync(container.getId(),
container.getNodeId());
continue;
}
LOG.info("Launching " + componentType + " on a new container."
+ ", containerId=" + container.getId() + ", containerNode="
+ container.getNodeId().getHost() + ":"
+ container.getNodeId().getPort() + ", containerNodeURI="
+ container.getNodeHttpAddress() + ", containerResourceMemory="
+ rsrc.getMemorySize() + ", containerResourceVirtualCores="
+ rsrc.getVirtualCores());
Thread launchThread = new Thread(containerLauncher);
// launch and start the container on a separate thread to keep
// the main thread unblocked
// as all containers may not be allocated at one go.
launchThreads.add(launchThread);
launchThread.start();
}
}
@Override
public void onShutdownRequest() {
markCompleted();
}
@Override
public void onNodesUpdated(List<NodeReport> updatedNodes) {
LOG.info("onNodesUpdated: " + Joiner.on(",").join(updatedNodes));
}
@Override
public float getProgress() {
return 0.0f;
}
@Override
public void onError(Throwable e) {
markCompleted();
amRMClient.stop();
}
@Override
public void onContainersUpdated(List<UpdatedContainer> containers) {
LOG.info("onContainersUpdated: " + Joiner.on(",").join(containers));
}
}
private class NMCallbackHandler
extends NMClientAsync.AbstractCallbackHandler {
@Override
public void onContainerStopped(ContainerId containerId) {
if (isNameNode(containerId)) {
LOG.info("NameNode container stopped: " + containerId);
namenodeContainer = null;
markCompleted();
} else if (isDataNode(containerId)) {
LOG.debug("DataNode container stopped: " + containerId);
datanodeContainers.remove(containerId);
} else {
LOG.error(
"onContainerStopped received unknown container ID: " + containerId);
}
}
@Override
public void onContainerStatusReceived(ContainerId containerId,
ContainerStatus containerStatus) {
if (LOG.isDebugEnabled()) {
LOG.debug("Container Status: id=" + containerId + ", status="
+ containerStatus);
}
}
@Override
public void onContainerStarted(ContainerId containerId,
Map<String, ByteBuffer> allServiceResponse) {
if (isNameNode(containerId)) {
LOG.info("NameNode container started at ID " + containerId);
} else if (isDataNode(containerId)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Succeeded to start DataNode Container " + containerId);
}
nmClientAsync.getContainerStatusAsync(containerId,
datanodeContainers.get(containerId).getNodeId());
} else {
LOG.error(
"onContainerStarted received unknown container ID: " + containerId);
}
}
@Override
public void onStartContainerError(ContainerId containerId, Throwable t) {
if (isNameNode(containerId)) {
LOG.error("Failed to start namenode container ID " + containerId, t);
namenodeContainer = null;
markCompleted();
} else if (isDataNode(containerId)) {
LOG.error("Failed to start DataNode Container " + containerId);
datanodeContainers.remove(containerId);
numCompletedDataNodeContainers.incrementAndGet();
numFailedDataNodeContainers.incrementAndGet();
} else {
LOG.error("onStartContainerError received unknown container ID: "
+ containerId);
}
}
@Override
public void onGetContainerStatusError(ContainerId containerId,
Throwable t) {
LOG.error("Failed to query the status of Container " + containerId);
}
@Override
public void onStopContainerError(ContainerId containerId, Throwable t) {
if (isNameNode(containerId)) {
LOG.error("Failed to stop NameNode container ID " + containerId);
namenodeContainer = null;
} else if (isDataNode(containerId)) {
LOG.error("Failed to stop DataNode Container " + containerId);
datanodeContainers.remove(containerId);
} else {
LOG.error("onStopContainerError received unknown containerID: "
+ containerId);
}
}
@Override
@Deprecated
public void onContainerResourceIncreased(ContainerId containerId,
Resource resource) {
LOG.info("onContainerResourceIncreased: {}, {}", containerId, resource);
}
@Override
public void onContainerResourceUpdated(ContainerId containerId,
Resource resource) {
LOG.info("onContainerResourceUpdated: {}, {}", containerId, resource);
}
@Override
@Deprecated
public void onIncreaseContainerResourceError(ContainerId containerId,
Throwable t) {
LOG.info("onIncreaseContainerResourceError: {}", containerId, t);
}
@Override
public void onUpdateContainerResourceError(ContainerId containerId,
Throwable t) {
LOG.info("onUpdateContainerResourceError: {}", containerId, t);
}
}
/**
* Thread to connect to the {@link ContainerManagementProtocol} and launch the
* container that will execute the shell command.
*/
private class LaunchContainerRunnable implements Runnable {
// Allocated container
private Container container;
private boolean isNameNodeLauncher;
/**
* @param lcontainer Allocated container
* @param isNameNode True iff this should launch a NameNode
*/
LaunchContainerRunnable(Container lcontainer, boolean isNameNode) {
this.container = lcontainer;
this.isNameNodeLauncher = isNameNode;
}
/**
* Get the map of local resources to be used for launching this container.
*/
private Map<String, LocalResource> getLocalResources() {
Map<String, LocalResource> localResources = new HashMap<>();
Map<String, String> envs = System.getenv();
addAsLocalResourceFromEnv(DynoConstants.CONF_ZIP, localResources, envs);
addAsLocalResourceFromEnv(DynoConstants.START_SCRIPT, localResources,
envs);
addAsLocalResourceFromEnv(DynoConstants.HADOOP_BINARY, localResources,
envs);
addAsLocalResourceFromEnv(DynoConstants.VERSION, localResources, envs);
addAsLocalResourceFromEnv(DynoConstants.DYNO_DEPENDENCIES, localResources,
envs);
if (isNameNodeLauncher) {
addAsLocalResourceFromEnv(DynoConstants.FS_IMAGE, localResources, envs);
addAsLocalResourceFromEnv(DynoConstants.FS_IMAGE_MD5, localResources,
envs);
} else {
int blockFilesToLocalize = Math.max(1,
amOptions.getDataNodesPerCluster());
for (int i = 0; i < blockFilesToLocalize; i++) {
try {
localResources.put(
DynoConstants.BLOCK_LIST_RESOURCE_PATH_PREFIX + i,
blockListFiles.remove(0));
} catch (IndexOutOfBoundsException e) {
break;
}
}
}
return localResources;
}
/**
* Connects to CM, sets up container launch context for shell command and
* eventually dispatches the container start request to the CM.
*/
@Override
public void run() {
LOG.info("Setting up container launch context for containerid="
+ container.getId() + ", isNameNode=" + isNameNodeLauncher);
ContainerLaunchContext ctx = Records
.newRecord(ContainerLaunchContext.class);
// Set the environment
ctx.setEnvironment(amOptions.getShellEnv());
ctx.setApplicationACLs(applicationAcls);
try {
ctx.setLocalResources(getLocalResources());
ctx.setCommands(getContainerStartCommand());
} catch (IOException e) {
LOG.error("Error while configuring container!", e);
return;
}
// Set up tokens for the container
ctx.setTokens(allTokens.duplicate());
nmClientAsync.startContainerAsync(container, ctx);
LOG.info("Starting {}; track at: http://{}/node/containerlogs/{}/{}/",
isNameNodeLauncher ? "NAMENODE" : "DATANODE",
container.getNodeHttpAddress(), container.getId(), launchingUser);
}
/**
* Return the command used to start this container.
*/
private List<String> getContainerStartCommand() throws IOException {
// Set the necessary command to execute on the allocated container
List<String> vargs = new ArrayList<>();
// Set executable command
vargs.add("./" + DynoConstants.START_SCRIPT.getResourcePath());
String component = isNameNodeLauncher ? "namenode" : "datanode";
vargs.add(component);
if (isNameNodeLauncher) {
vargs.add(remoteStoragePath.getFileSystem(conf)
.makeQualified(remoteStoragePath).toString());
} else {
vargs.add(namenodeServiceRpcAddress);
vargs.add(String.valueOf(amOptions.getDataNodeLaunchDelaySec() < 1 ? 0
: RAND.nextInt(
Ints.checkedCast(amOptions.getDataNodeLaunchDelaySec()))));
}
// Add log redirect params
vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout");
vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr");
LOG.info("Completed setting up command for " + component + ": " + vargs);
return Lists.newArrayList(Joiner.on(" ").join(vargs));
}
/**
* Add the given resource into the map of resources, using information from
* the supplied environment variables.
*
* @param resource The resource to add.
* @param localResources Map of local resources to insert into.
* @param env Map of environment variables.
*/
public void addAsLocalResourceFromEnv(DynoResource resource,
Map<String, LocalResource> localResources, Map<String, String> env) {
LOG.debug("Adding resource to localResources: " + resource);
String resourcePath = resource.getResourcePath();
if (resourcePath == null) {
// Default to using the file name in the path
resourcePath = resource.getPath(env).getName();
}
localResources.put(resourcePath,
LocalResource.newInstance(URL.fromPath(resource.getPath(env)),
resource.getType(), LocalResourceVisibility.APPLICATION,
resource.getLength(env), resource.getTimestamp(env)));
}
}
private List<LocalResource> getDataNodeBlockListingFiles()
throws IOException {
Path blockListDirPath = new Path(
System.getenv().get(DynoConstants.BLOCK_LIST_PATH_ENV));
LOG.info("Looking for block listing files in " + blockListDirPath);
FileSystem blockZipFS = blockListDirPath.getFileSystem(conf);
List<LocalResource> files = new LinkedList<>();
for (FileStatus stat : blockZipFS.listStatus(blockListDirPath,
DynoConstants.BLOCK_LIST_FILE_FILTER)) {
LocalResource blockListResource = LocalResource.newInstance(
URL.fromPath(stat.getPath()),
LocalResourceType.FILE, LocalResourceVisibility.APPLICATION,
stat.getLen(), stat.getModificationTime());
files.add(blockListResource);
}
return files;
}
/**
* Return true iff {@code containerId} represents the NameNode container.
*/
private boolean isNameNode(ContainerId containerId) {
return namenodeContainer != null
&& namenodeContainer.getId().equals(containerId);
}
/**
* Return true iff {@code containerId} represents a DataNode container.
*/
private boolean isDataNode(ContainerId containerId) {
return datanodeContainers.containsKey(containerId);
}
/**
* Setup the request that will be sent to the RM for the container ask.
*
* @return the setup ResourceRequest to be sent to RM
*/
private ContainerRequest setupContainerAskForRM(int memory, int vcores,
int priority, String nodeLabel) {
Priority pri = Records.newRecord(Priority.class);
pri.setPriority(priority);
// Set up resource type requirements
// For now, memory and CPU are supported so we set memory and cpu
// requirements
Resource capability = Records.newRecord(Resource.class);
capability.setMemorySize(memory);
capability.setVirtualCores(vcores);
return new ContainerRequest(capability, null, null, pri, true, nodeLabel);
}
}

View File

@ -0,0 +1,53 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementStatus;
/**
* A BlockPlacementPolicy which always considered itself satisfied. This avoids
* the issue that the Dynamometer NameNode will complain about blocks being
* under-replicated because they're not being put on distinct racks.
*/
public class BlockPlacementPolicyAlwaysSatisfied
extends BlockPlacementPolicyDefault {
private static final BlockPlacementStatusSatisfied SATISFIED =
new BlockPlacementStatusSatisfied();
private static class BlockPlacementStatusSatisfied
implements BlockPlacementStatus {
@Override
public boolean isPlacementPolicySatisfied() {
return true;
}
public String getErrorDescription() {
return null;
}
}
@Override
public BlockPlacementStatus verifyBlockPlacement(DatanodeInfo[] locs,
int numberOfReplicas) {
return SATISFIED;
}
}

View File

@ -0,0 +1,130 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer;
import java.util.regex.Pattern;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.PathFilter;
import static org.apache.hadoop.yarn.api.records.LocalResourceType.*;
/**
* Constants used in both Client and Application Master.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public final class DynoConstants {
private DynoConstants() {}
// Directory to use for remote storage (a location on the remote FS which
// can be accessed by all components). This will be the name of the directory
// within the submitter's home directory.
public static final String DYNAMOMETER_STORAGE_DIR = ".dynamometer";
/* The following used for Client -> AM communication */
// Resource for the zip file of all of the configuration for the
// DataNodes/NameNode
public static final DynoResource CONF_ZIP =
new DynoResource("CONF_ZIP", ARCHIVE, "conf");
// Resource for the Hadoop binary archive (distribution tar)
public static final DynoResource HADOOP_BINARY =
new DynoResource("HADOOP_BINARY", ARCHIVE, "hadoopBinary");
// Resource for the script used to start the DataNodes/NameNode
public static final DynoResource START_SCRIPT =
new DynoResource("START_SCRIPT", FILE, "start-component.sh");
// Resource for the file system image file used by the NameNode
public static final DynoResource FS_IMAGE =
new DynoResource("FS_IMAGE", FILE, null);
// Resource for the md5 file accompanying the file system image for the
// NameNode
public static final DynoResource FS_IMAGE_MD5 =
new DynoResource("FS_IMAGE_MD5", FILE, null);
// Resource for the VERSION file accompanying the file system image
public static final DynoResource VERSION =
new DynoResource("VERSION", FILE, "VERSION");
// Resource for the archive containing all dependencies
public static final DynoResource DYNO_DEPENDENCIES =
new DynoResource("DYNO_DEPS", ARCHIVE, "dependencies");
// Environment variable which will contain the location of the directory
// which holds all of the block files for the DataNodes
public static final String BLOCK_LIST_PATH_ENV = "BLOCK_ZIP_PATH";
// The format of the name of a single block file
public static final Pattern BLOCK_LIST_FILE_PATTERN =
Pattern.compile("dn[0-9]+-a-[0-9]+-r-[0-9]+");
// The file name to use when localizing the block file on a DataNode; will be
// suffixed with an integer
public static final String BLOCK_LIST_RESOURCE_PATH_PREFIX = "blocks/block";
public static final PathFilter BLOCK_LIST_FILE_FILTER = (path) ->
DynoConstants.BLOCK_LIST_FILE_PATTERN.matcher(path.getName()).find();
// Environment variable which will contain the full path of the directory
// which should be used for remote (shared) storage
public static final String REMOTE_STORAGE_PATH_ENV = "REMOTE_STORAGE_PATH";
// Environment variable which will contain the RPC address of the NameNode
// which the DataNodes should contact, if the NameNode is not launched
// internally by this application
public static final String REMOTE_NN_RPC_ADDR_ENV = "REMOTE_NN_RPC_ADDR";
// Environment variable which will contain the view ACLs for the launched
// containers.
public static final String JOB_ACL_VIEW_ENV = "JOB_ACL_VIEW";
/* The following used for AM -> DN, NN communication */
// The name of the file which will store information about the NameNode
// (within the remote storage directory)
public static final String NN_INFO_FILE_NAME = "nn_info.prop";
// Environment variable which will contain additional arguments for the
// NameNode
public static final String NN_ADDITIONAL_ARGS_ENV = "NN_ADDITIONAL_ARGS";
// Environment variable which will contain additional arguments for the
// DataNode
public static final String DN_ADDITIONAL_ARGS_ENV = "DN_ADDITIONAL_ARGS";
// Environment variable which will contain the directory to use for the
// NameNode's name directory;
// if not specified a directory within the YARN container working directory
// will be used.
public static final String NN_NAME_DIR_ENV = "NN_NAME_DIR";
// Environment variable which will contain the directory to use for the
// NameNode's edits directory;
// if not specified a directory within the YARN container working directory
// will be used.
public static final String NN_EDITS_DIR_ENV = "NN_EDITS_DIR";
public static final String NN_FILE_METRIC_PERIOD_ENV =
"NN_FILE_METRIC_PERIOD";
/*
* These are used as the names of properties and as the environment variables
*/
// The port to use on the NameNode host when contacting for client RPCs
public static final String NN_RPC_PORT = "NN_RPC_PORT";
// The hostname of the machine running the NameNode
public static final String NN_HOSTNAME = "NN_HOSTNAME";
// The port to use on the NameNode host when contacting for service RPCs
public static final String NN_SERVICERPC_PORT = "NN_SERVICERPC_PORT";
// The port to use on the NameNode host when contacting for HTTP access
public static final String NN_HTTP_PORT = "NN_HTTP_PORT";
}

View File

@ -0,0 +1,584 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer;
import com.google.common.base.Joiner;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Supplier;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSUtilClient;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.client.BlockReportOptions;
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonToken;
import org.slf4j.Logger;
/**
* A collection of utilities used by the Dynamometer infrastructure application.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public final class DynoInfraUtils {
private DynoInfraUtils() {}
public static final String DYNO_CONF_PREFIX = "dyno.";
public static final String DYNO_INFRA_PREFIX = DYNO_CONF_PREFIX + "infra.";
public static final String APACHE_DOWNLOAD_MIRROR_KEY = DYNO_CONF_PREFIX
+ "apache-mirror";
// Set a generic mirror as the default.
public static final String APACHE_DOWNLOAD_MIRROR_DEFAULT =
"http://mirrors.ocf.berkeley.edu/apache/";
private static final String APACHE_DOWNLOAD_MIRROR_SUFFIX_FORMAT =
"hadoop/common/hadoop-%s/hadoop-%s.tar.gz";
public static final String HADOOP_TAR_FILENAME_FORMAT = "hadoop-%s.tar.gz";
public static final String DATANODE_LIVE_MIN_FRACTION_KEY =
DYNO_INFRA_PREFIX + "ready.datanode-min-fraction";
public static final float DATANODE_LIVE_MIN_FRACTION_DEFAULT = 0.99f;
public static final String MISSING_BLOCKS_MAX_FRACTION_KEY =
DYNO_INFRA_PREFIX + "ready.missing-blocks-max-fraction";
public static final float MISSING_BLOCKS_MAX_FRACTION_DEFAULT = 0.0001f;
public static final String UNDERREPLICATED_BLOCKS_MAX_FRACTION_KEY =
DYNO_INFRA_PREFIX + "ready.underreplicated-blocks-max-fraction";
public static final float UNDERREPLICATED_BLOCKS_MAX_FRACTION_DEFAULT = 0.01f;
// The JMX bean queries to execute for various beans.
public static final String NAMENODE_STARTUP_PROGRESS_JMX_QUERY =
"Hadoop:service=NameNode,name=StartupProgress";
public static final String FSNAMESYSTEM_JMX_QUERY =
"Hadoop:service=NameNode,name=FSNamesystem";
public static final String FSNAMESYSTEM_STATE_JMX_QUERY =
"Hadoop:service=NameNode,name=FSNamesystemState";
public static final String NAMENODE_INFO_JMX_QUERY =
"Hadoop:service=NameNode,name=NameNodeInfo";
// The JMX property names of various properties.
public static final String JMX_MISSING_BLOCKS = "MissingBlocks";
public static final String JMX_UNDER_REPLICATED_BLOCKS =
"UnderReplicatedBlocks";
public static final String JMX_BLOCKS_TOTAL = "BlocksTotal";
public static final String JMX_LIVE_NODE_COUNT = "NumLiveDataNodes";
public static final String JMX_LIVE_NODES_LIST = "LiveNodes";
/**
* If a file matching {@value HADOOP_TAR_FILENAME_FORMAT} and {@code version}
* is found in {@code destinationDir}, return its path. Otherwise, first
* download the tarball from an Apache mirror. If the
* {@value APACHE_DOWNLOAD_MIRROR_KEY} configuration or system property
* (checked in that order) is set, use that as the mirror; else use
* {@value APACHE_DOWNLOAD_MIRROR_DEFAULT}.
*
* @param version The version of Hadoop to download, like "2.7.4"
* or "3.0.0-beta1"
* @return The path to the tarball.
*/
public static File fetchHadoopTarball(File destinationDir, String version,
Configuration conf, Logger log) throws IOException {
log.info("Looking for Hadoop tarball for version: " + version);
File destinationFile = new File(destinationDir,
String.format(HADOOP_TAR_FILENAME_FORMAT, version));
if (destinationFile.exists()) {
log.info("Found tarball at: " + destinationFile.getAbsolutePath());
return destinationFile;
}
String apacheMirror = conf.get(APACHE_DOWNLOAD_MIRROR_KEY);
if (apacheMirror == null) {
apacheMirror = System.getProperty(APACHE_DOWNLOAD_MIRROR_KEY,
APACHE_DOWNLOAD_MIRROR_DEFAULT);
}
destinationDir.mkdirs();
URL downloadURL = new URL(apacheMirror + String
.format(APACHE_DOWNLOAD_MIRROR_SUFFIX_FORMAT, version, version));
log.info("Downloading tarball from: <{}> to <{}>", downloadURL,
destinationFile.getAbsolutePath());
FileUtils.copyURLToFile(downloadURL, destinationFile, 10000, 60000);
log.info("Completed downloading of Hadoop tarball");
return destinationFile;
}
/**
* Get the URI that can be used to access the launched NameNode for HDFS RPCs.
*
* @param nameNodeProperties The set of properties representing the
* information about the launched NameNode.
* @return The HDFS URI.
*/
static URI getNameNodeHdfsUri(Properties nameNodeProperties) {
return URI.create(String.format("hdfs://%s:%s/",
nameNodeProperties.getProperty(DynoConstants.NN_HOSTNAME),
nameNodeProperties.getProperty(DynoConstants.NN_RPC_PORT)));
}
/**
* Get the URI that can be used to access the launched NameNode for HDFS
* Service RPCs (i.e. from DataNodes).
*
* @param nameNodeProperties The set of properties representing the
* information about the launched NameNode.
* @return The service RPC URI.
*/
static URI getNameNodeServiceRpcAddr(Properties nameNodeProperties) {
return URI.create(String.format("hdfs://%s:%s/",
nameNodeProperties.getProperty(DynoConstants.NN_HOSTNAME),
nameNodeProperties.getProperty(DynoConstants.NN_SERVICERPC_PORT)));
}
/**
* Get the URI that can be used to access the launched NameNode's web UI, e.g.
* for JMX calls.
*
* @param nameNodeProperties The set of properties representing the
* information about the launched NameNode.
* @return The URI to the web UI.
*/
static URI getNameNodeWebUri(Properties nameNodeProperties) {
return URI.create(String.format("http://%s:%s/",
nameNodeProperties.getProperty(DynoConstants.NN_HOSTNAME),
nameNodeProperties.getProperty(DynoConstants.NN_HTTP_PORT)));
}
/**
* Get the URI that can be used to access the tracking interface for the
* NameNode, i.e. the web UI of the NodeManager hosting the NameNode
* container.
*
* @param nameNodeProperties The set of properties representing the
* information about the launched NameNode.
* @return The tracking URI.
*/
static URI getNameNodeTrackingUri(Properties nameNodeProperties)
throws IOException {
return URI.create(String.format("http://%s:%s/node/containerlogs/%s/%s/",
nameNodeProperties.getProperty(DynoConstants.NN_HOSTNAME),
nameNodeProperties.getProperty(Environment.NM_HTTP_PORT.name()),
nameNodeProperties.getProperty(Environment.CONTAINER_ID.name()),
UserGroupInformation.getCurrentUser().getShortUserName()));
}
/**
* Get the set of properties representing information about the launched
* NameNode. This method will wait for the information to be available until
* it is interrupted, or {@code shouldExit} returns true. It polls for a file
* present at {@code nameNodeInfoPath} once a second and uses that file to
* load the NameNode information.
*
* @param shouldExit Should return true iff this should stop waiting.
* @param conf The configuration.
* @param nameNodeInfoPath The path at which to expect the NameNode
* information file to be present.
* @param log Where to log information.
* @return Absent if this exited prematurely (i.e. due to {@code shouldExit}),
* else returns a set of properties representing information about the
* launched NameNode.
*/
static Optional<Properties> waitForAndGetNameNodeProperties(
Supplier<Boolean> shouldExit, Configuration conf, Path nameNodeInfoPath,
Logger log) throws IOException, InterruptedException {
while (!shouldExit.get()) {
try (FSDataInputStream nnInfoInputStream = nameNodeInfoPath
.getFileSystem(conf).open(nameNodeInfoPath)) {
Properties nameNodeProperties = new Properties();
nameNodeProperties.load(nnInfoInputStream);
return Optional.of(nameNodeProperties);
} catch (FileNotFoundException fnfe) {
log.debug("NameNode host information not yet available");
Thread.sleep(1000);
} catch (IOException ioe) {
log.warn("Unable to fetch NameNode host information; retrying", ioe);
Thread.sleep(1000);
}
}
return Optional.empty();
}
/**
* Wait for the launched NameNode to finish starting up. Continues until
* {@code shouldExit} returns true.
*
* @param nameNodeProperties The set of properties containing information
* about the NameNode.
* @param shouldExit Should return true iff this should stop waiting.
* @param log Where to log information.
*/
static void waitForNameNodeStartup(Properties nameNodeProperties,
Supplier<Boolean> shouldExit, Logger log)
throws IOException, InterruptedException {
if (shouldExit.get()) {
return;
}
log.info("Waiting for NameNode to finish starting up...");
waitForNameNodeJMXValue("Startup progress",
NAMENODE_STARTUP_PROGRESS_JMX_QUERY, "PercentComplete", 1.0, 0.01,
false, nameNodeProperties, shouldExit, log);
log.info("NameNode has started!");
}
/**
* Wait for the launched NameNode to be ready, i.e. to have at least 99% of
* its DataNodes register, have fewer than 0.01% of its blocks missing, and
* less than 1% of its blocks under replicated. Continues until the criteria
* have been met or {@code shouldExit} returns true.
*
* @param nameNodeProperties The set of properties containing information
* about the NameNode.
* @param numTotalDataNodes Total expected number of DataNodes to register.
* @param shouldExit Should return true iff this should stop waiting.
* @param log Where to log information.
*/
static void waitForNameNodeReadiness(final Properties nameNodeProperties,
int numTotalDataNodes, boolean triggerBlockReports,
Supplier<Boolean> shouldExit, final Configuration conf, final Logger log)
throws IOException, InterruptedException {
if (shouldExit.get()) {
return;
}
int minDataNodes = (int) (conf.getFloat(DATANODE_LIVE_MIN_FRACTION_KEY,
DATANODE_LIVE_MIN_FRACTION_DEFAULT) * numTotalDataNodes);
log.info(String.format(
"Waiting for %d DataNodes to register with the NameNode...",
minDataNodes));
waitForNameNodeJMXValue("Number of live DataNodes",
FSNAMESYSTEM_STATE_JMX_QUERY, JMX_LIVE_NODE_COUNT, minDataNodes,
numTotalDataNodes * 0.001, false, nameNodeProperties, shouldExit, log);
final int totalBlocks = Integer.parseInt(fetchNameNodeJMXValue(
nameNodeProperties, FSNAMESYSTEM_STATE_JMX_QUERY, JMX_BLOCKS_TOTAL));
final AtomicBoolean doneWaiting = new AtomicBoolean(false);
if (triggerBlockReports) {
// This will be significantly lower than the actual expected number of
// blocks because it does not
// take into account replication factor. However the block reports are
// pretty binary; either a full
// report has been received or it hasn't. Thus we don't mind the large
// underestimate here.
final int blockThreshold = totalBlocks / numTotalDataNodes * 2;
// The Configuration object here is based on the host cluster, which may
// have security enabled; we need to disable it to talk to the Dyno NN
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION,
"simple");
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
"false");
final DistributedFileSystem dfs = (DistributedFileSystem) FileSystem
.get(getNameNodeHdfsUri(nameNodeProperties), conf);
log.info("Launching thread to trigger block reports for Datanodes with <"
+ blockThreshold + " blocks reported");
Thread blockReportThread = new Thread(() -> {
// Here we count both Missing and UnderReplicated within under
// replicated
long lastUnderRepBlocks = Long.MAX_VALUE;
try {
while (true) { // this will eventually exit via an interrupt
try {
Thread.sleep(TimeUnit.MINUTES.toMillis(1));
long underRepBlocks = Long
.parseLong(fetchNameNodeJMXValue(nameNodeProperties,
FSNAMESYSTEM_JMX_QUERY, JMX_MISSING_BLOCKS))
+ Long.parseLong(fetchNameNodeJMXValue(nameNodeProperties,
FSNAMESYSTEM_STATE_JMX_QUERY,
JMX_UNDER_REPLICATED_BLOCKS));
long blockDecrease = lastUnderRepBlocks - underRepBlocks;
lastUnderRepBlocks = underRepBlocks;
if (blockDecrease < 0
|| blockDecrease > (totalBlocks * 0.001)) {
continue;
}
String liveNodeListString = fetchNameNodeJMXValue(
nameNodeProperties, NAMENODE_INFO_JMX_QUERY,
JMX_LIVE_NODES_LIST);
Set<String> datanodesToReport = parseStaleDataNodeList(
liveNodeListString, blockThreshold, log);
if (datanodesToReport.isEmpty() && doneWaiting.get()) {
log.info("BlockReportThread exiting; all DataNodes have "
+ "reported blocks");
break;
}
log.info("Queueing {} Datanodes for block report: {}",
datanodesToReport.size(),
Joiner.on(",").join(datanodesToReport));
DatanodeInfo[] datanodes = dfs.getDataNodeStats();
int cnt = 0;
for (DatanodeInfo datanode : datanodes) {
if (datanodesToReport.contains(datanode.getXferAddr(true))) {
Thread.sleep(1); // to throw an interrupt if one is found
triggerDataNodeBlockReport(conf, datanode.getIpcAddr(true));
cnt++;
Thread.sleep(1000);
}
}
if (cnt != datanodesToReport.size()) {
log.warn("Found {} Datanodes to queue block reports for but "
+ "was only able to trigger {}",
datanodesToReport.size(), cnt);
}
} catch (IOException ioe) {
log.warn("Exception encountered in block report thread", ioe);
}
}
} catch (InterruptedException ie) {
// Do nothing; just exit
}
log.info("Block reporting thread exiting");
});
blockReportThread.setDaemon(true);
blockReportThread
.setUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
blockReportThread.start();
}
float maxMissingBlocks = totalBlocks * conf.getFloat(
MISSING_BLOCKS_MAX_FRACTION_KEY, MISSING_BLOCKS_MAX_FRACTION_DEFAULT);
log.info("Waiting for MissingBlocks to fall below {}...",
maxMissingBlocks);
waitForNameNodeJMXValue("Number of missing blocks", FSNAMESYSTEM_JMX_QUERY,
JMX_MISSING_BLOCKS, maxMissingBlocks, totalBlocks * 0.0001, true,
nameNodeProperties, shouldExit, log);
float maxUnderreplicatedBlocks = totalBlocks
* conf.getFloat(UNDERREPLICATED_BLOCKS_MAX_FRACTION_KEY,
UNDERREPLICATED_BLOCKS_MAX_FRACTION_DEFAULT);
log.info("Waiting for UnderReplicatedBlocks to fall below {}...",
maxUnderreplicatedBlocks);
waitForNameNodeJMXValue("Number of under replicated blocks",
FSNAMESYSTEM_STATE_JMX_QUERY, JMX_UNDER_REPLICATED_BLOCKS,
maxUnderreplicatedBlocks, totalBlocks * 0.001, true, nameNodeProperties,
shouldExit, log);
log.info("NameNode is ready for use!");
doneWaiting.set(true);
}
/**
* Trigger a block report on a given DataNode.
*
* @param conf Configuration
* @param dataNodeTarget The target; should be like {@code <host>:<port>}
*/
private static void triggerDataNodeBlockReport(Configuration conf,
String dataNodeTarget) throws IOException {
InetSocketAddress datanodeAddr = NetUtils.createSocketAddr(dataNodeTarget);
ClientDatanodeProtocol dnProtocol = DFSUtilClient
.createClientDatanodeProtocolProxy(datanodeAddr,
UserGroupInformation.getCurrentUser(), conf,
NetUtils.getSocketFactory(conf, ClientDatanodeProtocol.class));
dnProtocol.triggerBlockReport(new BlockReportOptions.Factory().build());
}
/**
* Poll the launched NameNode's JMX for a specific value, waiting for it to
* cross some threshold. Continues until the threshold has been crossed or
* {@code shouldExit} returns true. Periodically logs the current value.
*
* @param valueName The human-readable name of the value which is being
* polled (for printing purposes only).
* @param jmxBeanQuery The JMX bean query to execute; should return a JMX
* property matching {@code jmxProperty}.
* @param jmxProperty The name of the JMX property whose value should be
* polled.
* @param threshold The threshold value to wait for the JMX property to be
* above/below.
* @param printThreshold The threshold between each log statement; controls
* how frequently the value is printed. For example,
* if this was 10, a statement would be logged every
* time the value has changed by more than 10.
* @param decreasing True iff the property's value is decreasing and this
* should wait until it is lower than threshold; else the
* value is treated as increasing and will wait until it
* is higher than threshold.
* @param nameNodeProperties The set of properties containing information
* about the NameNode.
* @param shouldExit Should return true iff this should stop waiting.
* @param log Where to log information.
*/
private static void waitForNameNodeJMXValue(String valueName,
String jmxBeanQuery, String jmxProperty, double threshold,
double printThreshold, boolean decreasing, Properties nameNodeProperties,
Supplier<Boolean> shouldExit, Logger log) throws InterruptedException {
double lastPrintedValue = decreasing ? Double.MAX_VALUE : Double.MIN_VALUE;
double value;
int retryCount = 0;
long startTime = Time.monotonicNow();
while (!shouldExit.get()) {
try {
value = Double.parseDouble(fetchNameNodeJMXValue(nameNodeProperties,
jmxBeanQuery, jmxProperty));
if ((decreasing && value <= threshold)
|| (!decreasing && value >= threshold)) {
log.info(String.format(
"%s = %.2f; %s threshold of %.2f; done waiting after %d ms.",
valueName, value, decreasing ? "below" : "above", threshold,
Time.monotonicNow() - startTime));
break;
} else if (Math.abs(value - lastPrintedValue) >= printThreshold) {
log.info(String.format("%s: %.2f", valueName, value));
lastPrintedValue = value;
}
} catch (IOException ioe) {
if (++retryCount % 20 == 0) {
log.warn("Unable to fetch {}; retried {} times / waited {} ms",
valueName, retryCount, Time.monotonicNow() - startTime, ioe);
}
}
Thread.sleep(3000);
}
}
static Set<String> parseStaleDataNodeList(String liveNodeJsonString,
final int blockThreshold, final Logger log) throws IOException {
final Set<String> dataNodesToReport = new HashSet<>();
JsonFactory fac = new JsonFactory();
JsonParser parser = fac.createJsonParser(IOUtils
.toInputStream(liveNodeJsonString, StandardCharsets.UTF_8.name()));
int objectDepth = 0;
String currentNodeAddr = null;
for (JsonToken tok = parser.nextToken(); tok != null; tok = parser
.nextToken()) {
if (tok == JsonToken.START_OBJECT) {
objectDepth++;
} else if (tok == JsonToken.END_OBJECT) {
objectDepth--;
} else if (tok == JsonToken.FIELD_NAME) {
if (objectDepth == 1) {
// This is where the Datanode identifiers are stored
currentNodeAddr = parser.getCurrentName();
} else if (objectDepth == 2) {
if (parser.getCurrentName().equals("numBlocks")) {
JsonToken valueToken = parser.nextToken();
if (valueToken != JsonToken.VALUE_NUMBER_INT
|| currentNodeAddr == null) {
throw new IOException(String.format("Malformed LiveNodes JSON; "
+ "got token = %s; currentNodeAddr = %s: %s",
valueToken, currentNodeAddr, liveNodeJsonString));
}
int numBlocks = parser.getIntValue();
if (numBlocks < blockThreshold) {
log.debug(String.format(
"Queueing Datanode <%s> for block report; numBlocks = %d",
currentNodeAddr, numBlocks));
dataNodesToReport.add(currentNodeAddr);
} else {
log.debug(String.format(
"Not queueing Datanode <%s> for block report; numBlocks = %d",
currentNodeAddr, numBlocks));
}
}
}
}
}
return dataNodesToReport;
}
/**
* Fetch a value from the launched NameNode's JMX.
*
* @param nameNodeProperties The set of properties containing information
* about the NameNode.
* @param jmxBeanQuery The JMX bean query to execute; should return a
* JMX property matching {@code jmxProperty}.
* @param property The name of the JMX property whose value should be polled.
* @return The value associated with the property.
*/
static String fetchNameNodeJMXValue(Properties nameNodeProperties,
String jmxBeanQuery, String property) throws IOException {
URI nnWebUri = getNameNodeWebUri(nameNodeProperties);
URL queryURL;
try {
queryURL = new URL(nnWebUri.getScheme(), nnWebUri.getHost(),
nnWebUri.getPort(), "/jmx?qry=" + jmxBeanQuery);
} catch (MalformedURLException e) {
throw new IllegalArgumentException("Invalid JMX query: \"" + jmxBeanQuery
+ "\" against " + "NameNode URI: " + nnWebUri);
}
HttpURLConnection conn = (HttpURLConnection) queryURL.openConnection();
if (conn.getResponseCode() != 200) {
throw new IOException(
"Unable to retrieve JMX: " + conn.getResponseMessage());
}
InputStream in = conn.getInputStream();
JsonFactory fac = new JsonFactory();
JsonParser parser = fac.createJsonParser(in);
if (parser.nextToken() != JsonToken.START_OBJECT
|| parser.nextToken() != JsonToken.FIELD_NAME
|| !parser.getCurrentName().equals("beans")
|| parser.nextToken() != JsonToken.START_ARRAY
|| parser.nextToken() != JsonToken.START_OBJECT) {
throw new IOException(
"Unexpected format of JMX JSON response for: " + jmxBeanQuery);
}
int objectDepth = 1;
String ret = null;
while (objectDepth > 0) {
JsonToken tok = parser.nextToken();
if (tok == JsonToken.START_OBJECT) {
objectDepth++;
} else if (tok == JsonToken.END_OBJECT) {
objectDepth--;
} else if (tok == JsonToken.FIELD_NAME) {
if (parser.getCurrentName().equals(property)) {
parser.nextToken();
ret = parser.getText();
break;
}
}
}
parser.close();
in.close();
conn.disconnect();
if (ret == null) {
throw new IOException(
"Property " + property + " not found within " + jmxBeanQuery);
} else {
return ret;
}
}
}

View File

@ -0,0 +1,72 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer;
import java.util.Map;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
class DynoResource {
private final String name;
private final LocalResourceType type;
private final String resourcePath;
DynoResource(String name, LocalResourceType type, String resourcePath) {
this.name = name;
this.type = type;
this.resourcePath = resourcePath;
}
public Path getPath(Map<String, String> env) {
return new Path(env.get(getLocationEnvVar()));
}
public long getTimestamp(Map<String, String> env) {
return Long.parseLong(env.get(getTimestampEnvVar()));
}
public long getLength(Map<String, String> env) {
return Long.parseLong(env.get(getLengthEnvVar()));
}
public String getLocationEnvVar() {
return name + "_LOCATION";
}
public String getTimestampEnvVar() {
return name + "_TIMESTAMP";
}
public String getLengthEnvVar() {
return name + "_LENGTH";
}
public LocalResourceType getType() {
return type;
}
public String getResourcePath() {
return resourcePath;
}
public String toString() {
return name;
}
}

View File

@ -0,0 +1,182 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Time;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* Starts up a number of DataNodes within the same JVM. These DataNodes all use
* {@link org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset}, so they do
* not store any actual data, and do not persist anything to disk; they maintain
* all metadata in memory. This is useful for testing and simulation purposes.
* <p>
* The DataNodes will attempt to connect to a NameNode defined by the default
* FileSystem. There will be one DataNode started for each block list file
* passed as an argument. Each of these files should contain a list of blocks
* that the corresponding DataNode should contain, as specified by a triplet of
* block ID, block size, and generation stamp. Each line of the file is one
* block, in the format:
* <p>
* {@code blockID,blockGenStamp,blockSize}
* <p>
* This class is loosely based off of
* {@link org.apache.hadoop.hdfs.DataNodeCluster}.
*/
public class SimulatedDataNodes extends Configured implements Tool {
// Set this arbitrarily large (100TB) since we don't care about storage
// capacity
private static final long STORAGE_CAPACITY = 100 * 2L << 40;
private static final String USAGE = "Usage: "
+ "org.apache.hadoop.tools.dynamometer.SimulatedDataNodes "
+ "bpid blockListFile1 [ blockListFileN ... ]\n"
+ " bpid should be the ID of the block pool to which these DataNodes "
+ "belong.\n"
+ " Each blockListFile specified should contain a list of blocks to "
+ "be served by one DataNode.\n"
+ " See the Javadoc of this class for more detail.";
static void printUsageExit(String err) {
System.out.println(err);
System.out.println(USAGE);
System.exit(1);
}
public static void main(String[] args) throws Exception {
SimulatedDataNodes datanodes = new SimulatedDataNodes();
ToolRunner.run(new HdfsConfiguration(), datanodes, args);
}
public int run(String[] args) throws Exception {
if (args.length < 2) {
printUsageExit("Not enough arguments");
}
String bpid = args[0];
List<Path> blockListFiles = new ArrayList<>();
for (int i = 1; i < args.length; i++) {
blockListFiles.add(new Path(args[i]));
}
URI defaultFS = FileSystem.getDefaultUri(getConf());
if (!HdfsConstants.HDFS_URI_SCHEME.equals(defaultFS.getScheme())) {
printUsageExit(
"Must specify an HDFS-based default FS! Got <" + defaultFS + ">");
}
String nameNodeAdr = defaultFS.getAuthority();
if (nameNodeAdr == null) {
printUsageExit("No NameNode address and port in config");
}
System.out.println("DataNodes will connect to NameNode at " + nameNodeAdr);
System.setProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA,
DataNode.getStorageLocations(getConf()).get(0).getUri().getPath());
SimulatedFSDataset.setFactory(getConf());
getConf().setLong(SimulatedFSDataset.CONFIG_PROPERTY_CAPACITY,
STORAGE_CAPACITY);
UserGroupInformation.setConfiguration(getConf());
MiniDFSCluster mc = new MiniDFSCluster();
try {
mc.formatDataNodeDirs();
} catch (IOException e) {
System.out.println("Error formatting DataNode dirs: " + e);
System.exit(1);
}
try {
System.out.println("Found " + blockListFiles.size()
+ " block listing files; launching DataNodes accordingly.");
mc.startDataNodes(getConf(), blockListFiles.size(), null, false,
StartupOption.REGULAR, null, null, null, null, false, true, true,
null);
long startTime = Time.monotonicNow();
System.out.println("Waiting for DataNodes to connect to NameNode and "
+ "init storage directories.");
Set<DataNode> datanodesWithoutFSDataset = new HashSet<>(
mc.getDataNodes());
while (!datanodesWithoutFSDataset.isEmpty()) {
datanodesWithoutFSDataset
.removeIf((dn) -> DataNodeTestUtils.getFSDataset(dn) != null);
Thread.sleep(100);
}
System.out.println("Waited " + (Time.monotonicNow() - startTime)
+ " ms for DataNode FSDatasets to be ready");
for (int dnIndex = 0; dnIndex < blockListFiles.size(); dnIndex++) {
Path blockListFile = blockListFiles.get(dnIndex);
try (FSDataInputStream fsdis = blockListFile.getFileSystem(getConf())
.open(blockListFile)) {
BufferedReader reader = new BufferedReader(
new InputStreamReader(fsdis));
List<Block> blockList = new ArrayList<>();
int cnt = 0;
for (String line = reader.readLine(); line != null; line = reader
.readLine()) {
// Format of the listing files is blockID,blockGenStamp,blockSize
String[] blockInfo = line.split(",");
blockList.add(new Block(Long.parseLong(blockInfo[0]),
Long.parseLong(blockInfo[2]), Long.parseLong(blockInfo[1])));
cnt++;
}
try {
mc.injectBlocks(dnIndex, blockList, bpid);
} catch (IOException ioe) {
System.out.printf("Error injecting blocks into DataNode %d for "
+ "block pool %s: %s%n", dnIndex, bpid,
ExceptionUtils.getStackTrace(ioe));
}
System.out.printf(
"Injected %d blocks into DataNode %d for block pool %s%n",
cnt, dnIndex, bpid);
}
}
} catch (IOException e) {
System.out.println(
"Error creating DataNodes: " + ExceptionUtils.getStackTrace(e));
return 1;
}
return 0;
}
}

View File

@ -0,0 +1,29 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* The Dynamometer tool is used to stress test the HDFS NameNode by setting
* up a realistic environment consistent of fake DataNodes and a real NameNode
* with a real FS namesystem and near-real user workload (RPCs). Please see
* the documentation for more detail.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
package org.apache.hadoop.tools.dynamometer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;

View File

@ -0,0 +1,344 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Script for launching Dynamometer components within YARN containers.
# USAGE:
# ./start-component.sh namenode hdfs_storage
# OR
# ./start-component.sh datanode nn_servicerpc_address sleep_time_sec
# First parameter should be component being launched, either `datanode` or `namenode`
# If component is namenode, hdfs_storage is expected to point to a location to
# write out shared files such as the file containing the information about
# which ports the NN started on (at nn_info.prop) and the namenode's metrics
# (at namenode_metrics)
# If component is datanode, nn_servicerpc_address is expected to point to the
# servicerpc address of the namenode. sleep_time_sec is the amount of time that
# should be allowed to elapse before launching anything. The
# `org.apache.hadoop.tools.dynamometer.SimulatedDataNodes` class will be used to start multiple
# DataNodes within the same JVM, and they will store their block files in memory.
component="$1"
if [[ "$component" != "datanode" && "$component" != "namenode" ]]; then
echo "Unknown component type: '${component}'"
exit 1
fi
if [[ "$component" = "namenode" ]]; then
if [[ $# -lt 2 ]]; then
echo "Not enough arguments for NameNode"
exit 1
fi
hdfsStoragePath="$2"
else
if [[ $# -lt 3 ]]; then
echo "Not enough arguments for DataNode"
exit 1
fi
nnServiceRpcAddress="$2"
launchDelaySec="$3"
fi
containerID=${CONTAINER_ID##*_}
echo "Starting ${component} with ID ${containerID}"
echo "PWD is: $(pwd)"
confDir="$(pwd)/conf/etc/hadoop"
umask 022
baseDir="$(pwd)/dyno-node"
# Set Hadoop's log dir to that of the NodeManager,
# then YARN will automatically help us handle the logs
# May be a comma-separated list; just take the first one
logDir=${LOG_DIRS%%,*}
pidDir="$baseDir/pid"
baseHttpPort=50075
baseRpcPort=9000
baseServiceRpcPort=9020
rm -rf "$baseDir"
mkdir -p "$pidDir"
chmod 755 "$baseDir"
chmod 700 "$pidDir"
# Set Hadoop variables for component
hadoopHome="$(find -H "$(pwd)/hadoopBinary" -depth 1 -type d | head -n 1)"
# Save real environment for later
hadoopConfOriginal=${HADOOP_CONF_DIR:-$confDir}
hadoopHomeOriginal=${HADOOP_HOME:-$hadoopHome}
echo "Saving original HADOOP_HOME as: $hadoopHomeOriginal"
echo "Saving original HADOOP_CONF_DIR as: $hadoopConfOriginal"
## @description A function to perform an HDFS command under the system Hadoop
## instead of the Hadoop-under-test.
## @audience private
## @stability evolving
function hdfs_original {
HADOOP_HOME="${hadoopHomeOriginal}" HADOOP_CONF_DIR="${hadoopConfOriginal}" \
HADOOP_HDFS_HOME="${hadoopHomeOriginal}" HADOOP_COMMON_HOME="${hadoopHomeOriginal}" \
"${hadoopHomeOriginal}/bin/hdfs" "$@"
}
extraClasspathDir="$(pwd)/additionalClasspath/"
mkdir -p "${extraClasspathDir}"
# Change environment variables for the Hadoop process
export HADOOP_HOME="$hadoopHome"
export HADOOP_PREFIX="$hadoopHome"
export PATH="$HADOOP_HOME/bin:$PATH"
export HADOOP_HDFS_HOME="$hadoopHome"
export HADOOP_COMMON_HOME="$hadoopHome"
export HADOOP_YARN_HOME="$hadoopHome"
export LIBHDFS_OPTS="-Djava.library.path=$hadoopHome/lib/native"
export HADOOP_MAPRED_HOME="$hadoopHome"
export HADOOP_CONF_DIR="${confDir}"
export YARN_CONF_DIR="${confDir}"
export HADOOP_LOG_DIR="${logDir}"
export HADOOP_PID_DIR="${pidDir}"
HADOOP_CLASSPATH="$(pwd)/dependencies/*:$extraClasspathDir"
export HADOOP_CLASSPATH
echo "Environment variables are set as:"
echo "(note that this doesn't include changes made by hadoop-env.sh)"
printenv
echo -e "\n\n"
# Starting from base_port, add the last two digits of the containerID,
# then keep searching upwards for a free port
# find_available_port base_port
find_available_port() {
basePort="$1"
currPort=$((basePort+((10#$containerID)%100)))
while netstat -nl | grep -q ":${currPort}[[:space:]]"; do
currPort=$((currPort+1))
done
echo "$currPort"
}
configOverrides=(
-D "hadoop.tmp.dir=${baseDir}"
-D "hadoop.security.authentication=simple"
-D "hadoop.security.authorization=false"
-D "dfs.http.policy=HTTP_ONLY"
-D "dfs.nameservices="
-D "dfs.web.authentication.kerberos.principal="
-D "dfs.web.authentication.kerberos.keytab="
-D "hadoop.http.filter.initializers="
-D "dfs.datanode.kerberos.principal="
-D "dfs.datanode.keytab.file="
-D "dfs.domain.socket.path="
-D "dfs.client.read.shortcircuit=false"
)
# NOTE: Must manually unset dfs.namenode.shared.edits.dir in configs
# because setting it to be empty is not enough (must be null)
if [[ "$component" = "datanode" ]]; then
if ! dataDirsOrig="$(hdfs getconf "${configOverrides[@]}" -confKey dfs.datanode.data.dir)"; then
echo "Unable to fetch data directories from config; using default"
dataDirsOrig="/data-dir/1,/data-dir/2"
fi
dataDirsOrig=(${dataDirsOrig//,/ })
dataDirs=""
for dataDir in "${dataDirsOrig[@]}"; do
stripped="file://$baseDir/${dataDir#file://}"
dataDirs="$dataDirs,$stripped"
done
dataDirs=${dataDirs:1}
echo "Going to sleep for $launchDelaySec sec..."
for _ in $(seq 1 "${launchDelaySec}"); do
sleep 1
if ! kill -0 $PPID 2>/dev/null; then
echo "Parent process ($PPID) exited while waiting; now exiting"
exit 0
fi
done
versionFile="$(pwd)/VERSION"
bpId="$(grep "${versionFile}" -e blockpoolID | awk -F= '{print $2}')"
listingFiles=()
blockDir="$(pwd)/blocks"
for listingFile in "${blockDir}"/*; do
listingFiles+=("file://${listingFile}")
done
datanodeClusterConfigs=(
-D "fs.defaultFS=${nnServiceRpcAddress}"
-D "dfs.datanode.hostname=$(hostname)"
-D "dfs.datanode.data.dir=${dataDirs}"
-D "dfs.datanode.ipc.address=0.0.0.0:0"
-D "dfs.datanode.http.address=0.0.0.0:0"
-D "dfs.datanode.address=0.0.0.0:0"
-D "dfs.datanode.directoryscan.interval=-1"
-D "fs.du.interval=43200000"
-D "fs.getspaceused.jitterMillis=21600000"
${configOverrides[@]}
"${bpId}"
${listingFiles[@]}
)
echo "Executing the following:"
echo "${HADOOP_HOME}/bin/hadoop org.apache.hadoop.tools.dynamometer.SimulatedDataNodes \
$DN_ADDITIONAL_ARGS" "${datanodeClusterConfigs[@]}"
# The argument splitting of DN_ADDITIONAL_ARGS is desirable behavior here
# shellcheck disable=SC2086
"${HADOOP_HOME}/bin/hadoop" org.apache.hadoop.tools.dynamometer.SimulatedDataNodes \
$DN_ADDITIONAL_ARGS "${datanodeClusterConfigs[@]}" &
launchSuccess="$?"
componentPID="$!"
if [[ ${launchSuccess} -ne 0 ]]; then
echo "Unable to launch DataNode cluster; exiting."
exit 1
fi
elif [[ "$component" = "namenode" ]]; then
nnHostname="${NM_HOST}"
nnRpcPort="$(find_available_port "$baseRpcPort")"
nnServiceRpcPort="$(find_available_port "$baseServiceRpcPort")"
nnHttpPort="$(find_available_port "$baseHttpPort")"
nnInfoLocalPath="$(pwd)/nn_info.prop"
rm -f "$nnInfoLocalPath"
# Port and host information to be uploaded to the non-Dynamometer HDFS
# to be consumed by the AM and Client
cat > "$nnInfoLocalPath" << EOF
NN_HOSTNAME=${nnHostname}
NN_RPC_PORT=${nnRpcPort}
NN_SERVICERPC_PORT=${nnServiceRpcPort}
NN_HTTP_PORT=${nnHttpPort}
NM_HTTP_PORT=${NM_HTTP_PORT}
CONTAINER_ID=${CONTAINER_ID}
EOF
echo "Using the following ports for the namenode:"
cat "$nnInfoLocalPath"
nnInfoRemotePath="$hdfsStoragePath/nn_info.prop"
# We use the original conf dir since we are uploading to the non-dynamometer cluster
hdfs_original dfs -copyFromLocal -f "$nnInfoLocalPath" "$nnInfoRemotePath"
echo "Uploaded namenode port info to $nnInfoRemotePath"
if [[ "$NN_FILE_METRIC_PERIOD" -gt 0 ]]; then
nnMetricOutputFileLocal="$HADOOP_LOG_DIR/namenode_metrics"
nnMetricPropsFileLocal="$extraClasspathDir/hadoop-metrics2-namenode.properties"
if [[ -f "$confDir/hadoop-metrics2-namenode.properties" ]]; then
cp "$confDir/hadoop-metrics2-namenode.properties" "$nnMetricPropsFileLocal"
chmod u+w "$nnMetricPropsFileLocal"
elif [[ -f "$confDir/hadoop-metrics2.properties" ]]; then
cp "$confDir/hadoop-metrics2.properties" "$nnMetricPropsFileLocal"
chmod u+w "$nnMetricPropsFileLocal"
fi
cat >> "$nnMetricPropsFileLocal" << EOF
namenode.sink.dyno-file.period=${NN_FILE_METRIC_PERIOD}
namenode.sink.dyno-file.class=org.apache.hadoop.metrics2.sink.FileSink
namenode.sink.dyno-file.filename=${nnMetricOutputFileLocal}
EOF
fi
nameDir="${NN_NAME_DIR:-${baseDir}/name-data}"
editsDir="${NN_EDITS_DIR:-${baseDir}/name-data}"
checkpointDir="$baseDir/checkpoint"
rm -rf "$nameDir" "$editsDir" "$checkpointDir"
mkdir -p "$nameDir/current" "$editsDir/current" "$checkpointDir"
chmod -R 700 "$nameDir" "$editsDir" "$checkpointDir"
fsImageFile="$(find "$(pwd)" -depth 1 -name "fsimage_*" | tail -n 1)"
fsImageMD5File="$(find "$(pwd)" -depth 1 -name "fsimage_*.md5" | tail -n 1)"
ln -snf "$fsImageFile" "$nameDir/current/$(basename "$fsImageFile")"
ln -snf "$fsImageMD5File" "$nameDir/current/$(basename "$fsImageMD5File")"
ln -snf "$(pwd)/VERSION" "$nameDir/current/VERSION"
chmod 700 "$nameDir"/current/*
namenodeConfigs=(
-D "fs.defaultFS=hdfs://${nnHostname}:${nnRpcPort}"
-D "dfs.namenode.rpc-address=${nnHostname}:${nnRpcPort}"
-D "dfs.namenode.servicerpc-address=${nnHostname}:${nnServiceRpcPort}"
-D "dfs.namenode.http-address=${nnHostname}:${nnHttpPort}"
-D "dfs.namenode.https-address=${nnHostname}:0"
-D "dfs.namenode.name.dir=file://${nameDir}"
-D "dfs.namenode.edits.dir=file://${editsDir}"
-D "dfs.namenode.checkpoint.dir=file://${baseDir}/checkpoint"
-D "dfs.namenode.kerberos.internal.spnego.principal="
-D "dfs.hosts="
-D "dfs.hosts.exclude="
-D "dfs.namenode.legacy-oiv-image.dir="
-D "dfs.namenode.kerberos.principal="
-D "dfs.namenode.keytab.file="
-D "dfs.namenode.safemode.threshold-pct=0.0f"
-D "dfs.permissions.enabled=true"
-D "dfs.cluster.administrators=\"*\""
-D "dfs.block.replicator.classname=org.apache.hadoop.tools.dynamometer.BlockPlacementPolicyAlwaysSatisfied"
-D "hadoop.security.impersonation.provider.class=org.apache.hadoop.tools.dynamometer.AllowAllImpersonationProvider"
${configOverrides[@]}
)
echo "Executing the following:"
echo "${HADOOP_HOME}/sbin/hadoop-daemon.sh start namenode" "${namenodeConfigs[@]}" "$NN_ADDITIONAL_ARGS"
# The argument splitting of NN_ADDITIONAL_ARGS is desirable behavior here
# shellcheck disable=SC2086
if ! "${HADOOP_HOME}/sbin/hadoop-daemon.sh" start namenode "${namenodeConfigs[@]}" $NN_ADDITIONAL_ARGS; then
echo "Unable to launch NameNode; exiting."
exit 1
fi
componentPIDFile="${pidDir}/hadoop-$(whoami)-${component}.pid"
while [[ ! -f "$componentPIDFile" ]]; do sleep 1; done
componentPID=$(cat "$componentPIDFile")
if [[ "$NN_FILE_METRIC_PERIOD" -gt 0 ]]; then
nnMetricOutputFileRemote="$hdfsStoragePath/namenode_metrics"
echo "Going to attempt to upload metrics to: $nnMetricOutputFileRemote"
touch "$nnMetricOutputFileLocal"
(tail -n 999999 -f "$nnMetricOutputFileLocal" & echo $! >&3) 3>metricsTailPIDFile | \
hdfs_original dfs -appendToFile - "$nnMetricOutputFileRemote" &
metricsTailPID="$(cat metricsTailPIDFile)"
if [[ "$metricsTailPID" = "" ]]; then
echo "Unable to upload metrics to HDFS"
else
echo "Metrics will be uploaded to HDFS by PID: $metricsTailPID"
fi
fi
fi
echo "Started $component at pid $componentPID"
## @description Perform cleanup, killing any outstanding processes and deleting files
## @audience private
## @stability evolving
function cleanup {
echo "Cleaning up $component at pid $componentPID"
kill -9 "$componentPID"
if [[ "$metricsTailPID" != "" ]]; then
echo "Stopping metrics streaming at pid $metricsTailPID"
kill "$metricsTailPID"
fi
echo "Deleting any remaining files"
rm -rf "$baseDir"
}
trap cleanup EXIT
echo "Waiting for parent process (PID: $PPID) OR $component process to exit"
while kill -0 "${componentPID}" 2>/dev/null && kill -0 $PPID 2>/dev/null; do
sleep 1
done
if kill -0 $PPID 2>/dev/null; then
echo "$component process exited; continuing to finish"
exit 1
else
echo "Parent process exited; continuing to finish"
exit 0
fi

View File

@ -0,0 +1,542 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer;
import com.google.common.collect.Sets;
import java.util.Optional;
import java.util.function.Supplier;
import org.apache.hadoop.test.PlatformAssumptions;
import org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditLogDirectParser;
import org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditReplayMapper;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.util.JarFinder;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.MiniYARNCluster;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator;
import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.hadoop.tools.dynamometer.DynoInfraUtils.fetchHadoopTarball;
import static org.apache.hadoop.hdfs.MiniDFSCluster.PROP_TEST_BUILD_DATA;
import static org.hamcrest.CoreMatchers.notNullValue;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
/**
* Start a Dynamometer cluster in a MiniYARNCluster. Ensure that the NameNode is
* able to start correctly, exit safemode, and run some commands. Subsequently
* the workload job is launched and it is verified that it completes
* successfully and is able to replay commands as expected.
*
* To run this test JAVA_HOME must be set correctly, and the {@code tar} utility
* must be available.
*
* You can optionally specify which version of HDFS should be started within the
* Dynamometer cluster; the default is {@value HADOOP_BIN_VERSION_DEFAULT}. This
* can be adjusted by setting the {@value HADOOP_BIN_VERSION_KEY} property. This
* will automatically download the correct Hadoop tarball for the specified
* version. It downloads from an Apache mirror (by default
* {@value DynoInfraUtils#APACHE_DOWNLOAD_MIRROR_DEFAULT}); which mirror is used
* can be controlled with the {@value DynoInfraUtils#APACHE_DOWNLOAD_MIRROR_KEY}
* property. Note that mirrors normally contain only the latest releases on any
* given release line; you may need to use
* {@code http://archive.apache.org/dist/} for older releases. The downloaded
* tarball will be stored in the test directory and can be reused between test
* executions. Alternatively, you can specify the {@value HADOOP_BIN_PATH_KEY}
* property to point directly to a Hadoop tarball which is present locally and
* no download will occur.
*/
public class TestDynamometerInfra {
private static final Logger LOG =
LoggerFactory.getLogger(TestDynamometerInfra.class);
private static final int MINICLUSTER_NUM_NMS = 3;
private static final int MINICLUSTER_NUM_DNS = 1;
private static final String HADOOP_BIN_PATH_KEY = "dyno.hadoop.bin.path";
private static final String HADOOP_BIN_VERSION_KEY =
"dyno.hadoop.bin.version";
private static final String HADOOP_BIN_VERSION_DEFAULT = "3.1.1";
private static final String FSIMAGE_FILENAME = "fsimage_0000000000000061740";
private static final String VERSION_FILENAME = "VERSION";
private static final String HADOOP_BIN_UNPACKED_DIR_PREFIX =
"hadoop_unpacked_";
private static final String NAMENODE_NODELABEL = "dyno_namenode";
private static final String DATANODE_NODELABEL = "dyno_datanode";
private static MiniDFSCluster miniDFSCluster;
private static MiniYARNCluster miniYARNCluster;
private static YarnClient yarnClient;
private static FileSystem fs;
private static Configuration conf;
private static Configuration yarnConf;
private static Path fsImageTmpPath;
private static Path fsVersionTmpPath;
private static Path blockImageOutputDir;
private static Path auditTraceDir;
private static Path confZip;
private static File testBaseDir;
private static File hadoopTarballPath;
private static File hadoopUnpackedDir;
private ApplicationId infraAppId;
@BeforeClass
public static void setupClass() throws Exception {
PlatformAssumptions.assumeNotWindows("Dynamometer will not run on Windows");
Assume.assumeThat("JAVA_HOME must be set properly",
System.getenv("JAVA_HOME"), notNullValue());
try {
Shell.ShellCommandExecutor tarCheck = new Shell.ShellCommandExecutor(
new String[]{"bash", "-c", "command -v tar"});
tarCheck.execute();
Assume.assumeTrue("tar command is not available",
tarCheck.getExitCode() == 0);
} catch (IOException ioe) {
Assume.assumeNoException("Unable to execute a shell command", ioe);
}
conf = new Configuration();
// Follow the conventions of MiniDFSCluster
testBaseDir = new File(
System.getProperty(PROP_TEST_BUILD_DATA, "build/test/data"));
String hadoopBinVersion = System.getProperty(HADOOP_BIN_VERSION_KEY,
HADOOP_BIN_VERSION_DEFAULT);
if (System.getProperty(HADOOP_BIN_PATH_KEY) == null) {
hadoopTarballPath = fetchHadoopTarball(testBaseDir, hadoopBinVersion,
conf, LOG);
} else {
hadoopTarballPath = new File(System.getProperty(HADOOP_BIN_PATH_KEY));
}
if (testBaseDir.exists()) {
// Delete any old unpacked bin dirs that weren't previously cleaned up
File[] oldUnpackedDirs = testBaseDir.listFiles(
(dir, name) -> name.startsWith(HADOOP_BIN_UNPACKED_DIR_PREFIX));
if (oldUnpackedDirs != null) {
for (File oldDir : oldUnpackedDirs) {
FileUtils.deleteQuietly(oldDir);
}
}
}
// Set up the Hadoop binary to be used as the system-level Hadoop install
hadoopUnpackedDir = new File(testBaseDir,
HADOOP_BIN_UNPACKED_DIR_PREFIX + UUID.randomUUID());
assertTrue("Failed to make temporary directory",
hadoopUnpackedDir.mkdirs());
Shell.ShellCommandExecutor shexec = new Shell.ShellCommandExecutor(
new String[] {"tar", "xzf", hadoopTarballPath.getAbsolutePath(), "-C",
hadoopUnpackedDir.getAbsolutePath()});
shexec.execute();
if (shexec.getExitCode() != 0) {
fail("Unable to execute tar to expand Hadoop binary");
}
conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128);
conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true);
for (String q : new String[] {"root", "root.default"}) {
conf.setInt(CapacitySchedulerConfiguration.PREFIX + q + "."
+ CapacitySchedulerConfiguration.CAPACITY, 100);
String accessibleNodeLabelPrefix = CapacitySchedulerConfiguration.PREFIX
+ q + "." + CapacitySchedulerConfiguration.ACCESSIBLE_NODE_LABELS;
conf.set(accessibleNodeLabelPrefix,
CapacitySchedulerConfiguration.ALL_ACL);
conf.setInt(accessibleNodeLabelPrefix + "." + DATANODE_NODELABEL + "."
+ CapacitySchedulerConfiguration.CAPACITY, 100);
conf.setInt(accessibleNodeLabelPrefix + "." + NAMENODE_NODELABEL + "."
+ CapacitySchedulerConfiguration.CAPACITY, 100);
}
// This is necessary to have the RM respect our vcore allocation request
conf.setClass(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS,
DominantResourceCalculator.class, ResourceCalculator.class);
conf.setBoolean(YarnConfiguration.NM_DISK_HEALTH_CHECK_ENABLE, false);
miniYARNCluster = new MiniYARNCluster(TestDynamometerInfra.class.getName(),
1, MINICLUSTER_NUM_NMS, 1, 1);
miniYARNCluster.init(conf);
miniYARNCluster.start();
yarnConf = miniYARNCluster.getConfig();
miniDFSCluster = new MiniDFSCluster.Builder(conf).format(true)
.numDataNodes(MINICLUSTER_NUM_DNS).build();
miniDFSCluster.waitClusterUp();
FileSystem.setDefaultUri(conf, miniDFSCluster.getURI());
FileSystem.setDefaultUri(yarnConf, miniDFSCluster.getURI());
fs = miniDFSCluster.getFileSystem();
URL url = Thread.currentThread().getContextClassLoader()
.getResource("yarn-site.xml");
if (url == null) {
throw new RuntimeException(
"Could not find 'yarn-site.xml' dummy file in classpath");
}
yarnConf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH,
new File(url.getPath()).getParent());
// Write the XML to a buffer before writing to the file. writeXml() can
// trigger a read of the existing yarn-site.xml, so writing directly could
// trigger a read of the file while it is in an inconsistent state
// (partially written)
try (ByteArrayOutputStream bytesOut = new ByteArrayOutputStream()) {
yarnConf.writeXml(bytesOut);
try (OutputStream fileOut = new FileOutputStream(
new File(url.getPath()))) {
fileOut.write(bytesOut.toByteArray());
}
}
yarnClient = YarnClient.createYarnClient();
yarnClient.init(new Configuration(yarnConf));
yarnClient.start();
fsImageTmpPath = fs.makeQualified(new Path("/tmp/" + FSIMAGE_FILENAME));
fsVersionTmpPath = fs.makeQualified(new Path("/tmp/" + VERSION_FILENAME));
blockImageOutputDir = fs.makeQualified(new Path("/tmp/blocks"));
auditTraceDir = fs.makeQualified(new Path("/tmp/audit_trace_direct"));
confZip = fs.makeQualified(new Path("/tmp/conf.zip"));
uploadFsimageResourcesToHDFS(hadoopBinVersion);
miniYARNCluster.waitForNodeManagersToConnect(30000);
RMNodeLabelsManager nodeLabelManager = miniYARNCluster.getResourceManager()
.getRMContext().getNodeLabelManager();
nodeLabelManager.addToCluserNodeLabelsWithDefaultExclusivity(
Sets.newHashSet(NAMENODE_NODELABEL, DATANODE_NODELABEL));
Map<NodeId, Set<String>> nodeLabels = new HashMap<>();
nodeLabels.put(miniYARNCluster.getNodeManager(0).getNMContext().getNodeId(),
Sets.newHashSet(NAMENODE_NODELABEL));
nodeLabels.put(miniYARNCluster.getNodeManager(1).getNMContext().getNodeId(),
Sets.newHashSet(DATANODE_NODELABEL));
nodeLabelManager.addLabelsToNode(nodeLabels);
}
@AfterClass
public static void teardownClass() throws Exception {
if (miniDFSCluster != null) {
miniDFSCluster.shutdown(true);
miniDFSCluster = null;
}
if (yarnClient != null) {
yarnClient.stop();
yarnClient = null;
}
if (miniYARNCluster != null) {
miniYARNCluster.getResourceManager().stop();
miniYARNCluster.getResourceManager().waitForServiceToStop(30000);
miniYARNCluster.stop();
miniYARNCluster.waitForServiceToStop(30000);
FileUtils.deleteDirectory(miniYARNCluster.getTestWorkDir());
miniYARNCluster = null;
}
if (hadoopUnpackedDir != null) {
FileUtils.deleteDirectory(hadoopUnpackedDir);
}
}
@After
public void tearDown() throws Exception {
if (infraAppId != null && yarnClient != null) {
yarnClient.killApplication(infraAppId);
}
infraAppId = null;
}
@Test(timeout = 15 * 60 * 1000)
public void testNameNodeInYARN() throws Exception {
final Client client = new Client(JarFinder.getJar(ApplicationMaster.class),
JarFinder.getJar(Assert.class));
Configuration localConf = new Configuration(yarnConf);
localConf.setLong(AuditLogDirectParser.AUDIT_START_TIMESTAMP_KEY, 60000);
client.setConf(localConf);
Thread appThread = new Thread(() -> {
try {
client.run(new String[] {"-" + Client.MASTER_MEMORY_MB_ARG, "128",
"-" + Client.CONF_PATH_ARG, confZip.toString(),
"-" + Client.BLOCK_LIST_PATH_ARG,
blockImageOutputDir.toString(), "-" + Client.FS_IMAGE_DIR_ARG,
fsImageTmpPath.getParent().toString(),
"-" + Client.HADOOP_BINARY_PATH_ARG,
hadoopTarballPath.getAbsolutePath(),
"-" + AMOptions.DATANODES_PER_CLUSTER_ARG, "2",
"-" + AMOptions.DATANODE_MEMORY_MB_ARG, "128",
"-" + AMOptions.DATANODE_NODELABEL_ARG, DATANODE_NODELABEL,
"-" + AMOptions.NAMENODE_MEMORY_MB_ARG, "256",
"-" + AMOptions.NAMENODE_METRICS_PERIOD_ARG, "1",
"-" + AMOptions.NAMENODE_NODELABEL_ARG, NAMENODE_NODELABEL,
"-" + AMOptions.SHELL_ENV_ARG,
"HADOOP_HOME=" + getHadoopHomeLocation(),
"-" + AMOptions.SHELL_ENV_ARG,
"HADOOP_CONF_DIR=" + getHadoopHomeLocation() + "/etc/hadoop",
"-" + Client.WORKLOAD_REPLAY_ENABLE_ARG,
"-" + Client.WORKLOAD_INPUT_PATH_ARG,
fs.makeQualified(new Path("/tmp/audit_trace_direct")).toString(),
"-" + Client.WORKLOAD_THREADS_PER_MAPPER_ARG, "1",
"-" + Client.WORKLOAD_START_DELAY_ARG, "10s",
"-" + AMOptions.NAMENODE_ARGS_ARG,
"-Ddfs.namenode.safemode.extension=0"});
} catch (Exception e) {
LOG.error("Error running client", e);
}
});
appThread.start();
LOG.info("Waiting for application ID to become available");
GenericTestUtils.waitFor(() -> {
try {
List<ApplicationReport> apps = yarnClient.getApplications();
if (apps.size() == 1) {
infraAppId = apps.get(0).getApplicationId();
return true;
} else if (apps.size() > 1) {
fail("Unexpected: more than one application");
}
} catch (IOException | YarnException e) {
fail("Unexpected exception: " + e);
}
return false;
}, 1000, 60000);
Supplier<Boolean> falseSupplier = () -> false;
Optional<Properties> namenodeProperties = DynoInfraUtils
.waitForAndGetNameNodeProperties(falseSupplier, localConf,
client.getNameNodeInfoPath(), LOG);
if (!namenodeProperties.isPresent()) {
fail("Unable to fetch NameNode properties");
}
DynoInfraUtils.waitForNameNodeReadiness(namenodeProperties.get(), 3, false,
falseSupplier, localConf, LOG);
// Test that we can successfully write to / read from the cluster
try {
URI nameNodeUri =
DynoInfraUtils.getNameNodeHdfsUri(namenodeProperties.get());
DistributedFileSystem dynoFS =
(DistributedFileSystem) FileSystem.get(nameNodeUri, localConf);
Path testFile = new Path("/tmp/test/foo");
dynoFS.mkdir(testFile.getParent(), FsPermission.getDefault());
FSDataOutputStream out = dynoFS.create(testFile, (short) 1);
out.write(42);
out.hsync();
out.close();
FileStatus[] stats = dynoFS.listStatus(testFile.getParent());
assertEquals(1, stats.length);
assertEquals("foo", stats[0].getPath().getName());
} catch (IOException e) {
LOG.error("Failed to write or read", e);
throw e;
}
Map<ContainerId, Container> namenodeContainers = miniYARNCluster
.getNodeManager(0).getNMContext().getContainers();
Map<ContainerId, Container> datanodeContainers = miniYARNCluster
.getNodeManager(1).getNMContext().getContainers();
Map<ContainerId, Container> amContainers = miniYARNCluster.getNodeManager(2)
.getNMContext().getContainers();
assertEquals(1, namenodeContainers.size());
assertEquals(2,
namenodeContainers.keySet().iterator().next().getContainerId());
assertEquals(2, datanodeContainers.size());
assertEquals(1, amContainers.size());
assertEquals(1, amContainers.keySet().iterator().next().getContainerId());
LOG.info("Waiting for workload job to start and complete");
GenericTestUtils.waitFor(() -> {
try {
return client.getWorkloadJob() != null
&& client.getWorkloadJob().isComplete();
} catch (IOException | IllegalStateException e) {
return false;
}
}, 3000, 60000);
LOG.info("Workload job completed");
if (!client.getWorkloadJob().isSuccessful()) {
fail("Workload job failed");
}
Counters counters = client.getWorkloadJob().getCounters();
assertEquals(6,
counters.findCounter(AuditReplayMapper.REPLAYCOUNTERS.TOTALCOMMANDS)
.getValue());
assertEquals(1,
counters
.findCounter(AuditReplayMapper.REPLAYCOUNTERS.TOTALINVALIDCOMMANDS)
.getValue());
LOG.info("Waiting for infra application to exit");
GenericTestUtils.waitFor(() -> {
try {
ApplicationReport report = yarnClient
.getApplicationReport(infraAppId);
return report
.getYarnApplicationState() == YarnApplicationState.KILLED;
} catch (IOException | YarnException e) {
return false;
}
}, 3000, 300000);
LOG.info("Waiting for metrics file to be ready");
// Try to read the metrics file
Path hdfsStoragePath = new Path(fs.getHomeDirectory(),
DynoConstants.DYNAMOMETER_STORAGE_DIR + "/" + infraAppId);
final Path metricsPath = new Path(hdfsStoragePath, "namenode_metrics");
GenericTestUtils.waitFor(() -> {
try {
FSDataInputStream in = fs.open(metricsPath);
String metricsOutput = in.readUTF();
in.close();
// Just assert that there is some metrics content in there
assertTrue(metricsOutput.contains("JvmMetrics"));
return true;
} catch (IOException ioe) {
return false;
}
}, 3000, 60000);
}
private static URI getResourcePath(String resourceName) {
try {
return TestDynamometerInfra.class.getClassLoader()
.getResource(resourceName).toURI();
} catch (URISyntaxException e) {
return null;
}
}
/**
* Get the Hadoop home location (i.e. for {@code HADOOP_HOME}) as the only
* directory within the unpacked location of the Hadoop tarball.
*
* @return The absolute path to the Hadoop home directory.
*/
private String getHadoopHomeLocation() {
File[] files = hadoopUnpackedDir.listFiles();
if (files == null || files.length != 1) {
fail("Should be 1 directory within the Hadoop unpacked dir");
}
return files[0].getAbsolutePath();
}
/**
* Look for the resource files relevant to {@code hadoopBinVersion} and upload
* them onto the MiniDFSCluster's HDFS for use by the subsequent jobs.
*
* @param hadoopBinVersion
* The version string (e.g. "3.1.1") for which to look for resources.
*/
private static void uploadFsimageResourcesToHDFS(String hadoopBinVersion)
throws IOException {
// Keep only the major/minor version for the resources path
String[] versionComponents = hadoopBinVersion.split("\\.");
if (versionComponents.length < 2) {
fail(
"At least major and minor version are required to be specified; got: "
+ hadoopBinVersion);
}
String hadoopResourcesPath = "hadoop_" + versionComponents[0] + "_"
+ versionComponents[1];
String fsImageResourcePath = hadoopResourcesPath + "/" + FSIMAGE_FILENAME;
fs.copyFromLocalFile(new Path(getResourcePath(fsImageResourcePath)),
fsImageTmpPath);
fs.copyFromLocalFile(
new Path(getResourcePath(fsImageResourcePath + ".md5")),
fsImageTmpPath.suffix(".md5"));
fs.copyFromLocalFile(
new Path(getResourcePath(hadoopResourcesPath + "/" + VERSION_FILENAME)),
fsVersionTmpPath);
fs.mkdirs(auditTraceDir);
IOUtils.copyBytes(
TestDynamometerInfra.class.getClassLoader()
.getResourceAsStream("audit_trace_direct/audit0"),
fs.create(new Path(auditTraceDir, "audit0")), conf, true);
fs.mkdirs(blockImageOutputDir);
for (String blockFile : new String[] {"dn0-a-0-r-00000", "dn1-a-0-r-00001",
"dn2-a-0-r-00002"}) {
IOUtils.copyBytes(
TestDynamometerInfra.class.getClassLoader()
.getResourceAsStream("blocks/" + blockFile),
fs.create(new Path(blockImageOutputDir, blockFile)), conf, true);
}
File tempConfZip = new File(testBaseDir, "conf.zip");
ZipOutputStream zos = new ZipOutputStream(
new FileOutputStream(tempConfZip));
for (String file : new String[] {"core-site.xml", "hdfs-site.xml",
"log4j.properties"}) {
zos.putNextEntry(new ZipEntry("etc/hadoop/" + file));
InputStream is = TestDynamometerInfra.class.getClassLoader()
.getResourceAsStream("conf/etc/hadoop/" + file);
IOUtils.copyBytes(is, zos, conf, false);
is.close();
zos.closeEntry();
}
zos.close();
fs.copyFromLocalFile(new Path(tempConfZip.toURI()), confZip);
tempConfZip.delete();
}
}

View File

@ -0,0 +1,66 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer;
import java.util.Set;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/** Tests for {@link DynoInfraUtils}. */
public class TestDynoInfraUtils {
private static final Logger LOG =
LoggerFactory.getLogger(TestDynoInfraUtils.class);
@Test
public void testParseStaleDatanodeListSingleDatanode() throws Exception {
// Confirm all types of values can be properly parsed
String json = "{"
+ "\"1.2.3.4:5\": {"
+ " \"numBlocks\": 5,"
+ " \"fooString\":\"stringValue\","
+ " \"fooInteger\": 1,"
+ " \"fooFloat\": 1.0,"
+ " \"fooArray\": []"
+ "}"
+ "}";
Set<String> out = DynoInfraUtils.parseStaleDataNodeList(json, 10, LOG);
assertEquals(1, out.size());
assertTrue(out.contains("1.2.3.4:5"));
}
@Test
public void testParseStaleDatanodeListMultipleDatanodes() throws Exception {
String json = "{"
+ "\"1.2.3.4:1\": {\"numBlocks\": 0}, "
+ "\"1.2.3.4:2\": {\"numBlocks\": 15}, "
+ "\"1.2.3.4:3\": {\"numBlocks\": 5}, "
+ "\"1.2.3.4:4\": {\"numBlocks\": 10} "
+ "}";
Set<String> out = DynoInfraUtils.parseStaleDataNodeList(json, 10, LOG);
assertEquals(2, out.size());
assertTrue(out.contains("1.2.3.4:1"));
assertTrue(out.contains("1.2.3.4:3"));
}
}

View File

@ -0,0 +1,34 @@
1073751825,11001,4
1073751921,11097,4
1073751918,11094,4
1073751915,11091,4
1073751912,11088,4
1073751909,11085,4
1073751906,11082,4
1073751903,11079,4
1073751900,11076,4
1073751897,11073,4
1073751894,11070,4
1073751891,11067,4
1073751888,11064,4
1073751885,11061,4
1073751882,11058,4
1073751879,11055,4
1073751876,11052,4
1073751924,11100,4
1073751873,11049,4
1073751870,11046,4
1073751867,11043,4
1073751864,11040,4
1073751861,11037,4
1073751858,11034,4
1073751855,11031,4
1073751852,11028,4
1073751849,11025,4
1073751846,11022,4
1073751843,11019,4
1073751840,11016,4
1073751837,11013,4
1073751834,11010,4
1073751831,11007,4
1073751828,11004,4

View File

@ -0,0 +1,33 @@
1073751847,11023,4
1073751922,11098,4
1073751841,11017,4
1073751829,11005,4
1073751919,11095,4
1073751856,11032,4
1073751886,11062,4
1073751916,11092,4
1073751871,11047,4
1073751880,11056,4
1073751913,11089,4
1073751826,11002,4
1073751868,11044,4
1073751910,11086,4
1073751853,11029,4
1073751838,11014,4
1073751907,11083,4
1073751889,11065,4
1073751865,11041,4
1073751904,11080,4
1073751874,11050,4
1073751877,11053,4
1073751901,11077,4
1073751850,11026,4
1073751862,11038,4
1073751898,11074,4
1073751832,11008,4
1073751883,11059,4
1073751895,11071,4
1073751844,11020,4
1073751859,11035,4
1073751892,11068,4
1073751835,11011,4

View File

@ -0,0 +1,33 @@
1073751890,11066,4
1073751887,11063,4
1073751845,11021,4
1073751884,11060,4
1073751881,11057,4
1073751830,11006,4
1073751878,11054,4
1073751842,11018,4
1073751875,11051,4
1073751872,11048,4
1073751839,11015,4
1073751869,11045,4
1073751866,11042,4
1073751863,11039,4
1073751836,11012,4
1073751860,11036,4
1073751857,11033,4
1073751920,11096,4
1073751917,11093,4
1073751827,11003,4
1073751914,11090,4
1073751854,11030,4
1073751911,11087,4
1073751908,11084,4
1073751833,11009,4
1073751905,11081,4
1073751851,11027,4
1073751902,11078,4
1073751899,11075,4
1073751896,11072,4
1073751848,11024,4
1073751893,11069,4
1073751923,11099,4

View File

@ -0,0 +1,20 @@
<?xml version='1.0' encoding='UTF-8'?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>hadoop.security.impersonation.provider.class</name>
<value>org.apache.hadoop.tools.dynamometer.AllowAllImpersonationProvider</value>
</property>
</configuration>

View File

@ -0,0 +1,20 @@
<?xml version='1.0' encoding='UTF-8'?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///dynamometer/dfs/data1,file:///dynamometer/dfs/data2</value>
</property>
</configuration>

View File

@ -0,0 +1,26 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
hadoop.root.logger=INFO,RFA
hadoop.log.dir=.
hadoop.log.file=hadoop.log
log4j.rootLogger=${hadoop.root.logger}
log4j.appender.RFA=org.apache.log4j.ConsoleAppender
log4j.appender.RFA.target=System.err
log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout

View File

@ -0,0 +1,7 @@
#Wed Jan 09 11:28:34 PST 2019
namespaceID=1157798435
clusterID=CID-91db90f5-16f6-4309-a88e-263223234059
cTime=1547062025277
storageType=NAME_NODE
blockpoolID=BP-963098314-0.0.0.0-1494453277231
layoutVersion=-64

View File

@ -0,0 +1 @@
211ac2f198a2d8f0488638d3b11f5238 *fsimage_0000000000000061740

View File

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- Dummy (invalid) config file to be overwritten by tests with MiniCluster configuration. -->
</configuration>

View File

@ -0,0 +1,146 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-project</artifactId>
<version>3.3.0-SNAPSHOT</version>
<relativePath>../../../hadoop-project</relativePath>
</parent>
<artifactId>hadoop-dynamometer-workload</artifactId>
<version>3.3.0-SNAPSHOT</version>
<description>Apache Hadoop Dynamometer Workload Simulator</description>
<name>Apache Hadoop Dynamometer Workload Simulator</name>
<packaging>jar</packaging>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minicluster</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<configuration>
<attach>true</attach>
</configuration>
<executions>
<execution>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
<phase>test-compile</phase>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<configuration>
<excludes>
<exclude>src/test/resources/audit_trace_direct/audit0</exclude>
<exclude>src/test/resources/audit_trace_hive/audit0</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>dist</id>
<activation>
<activeByDefault>false</activeByDefault>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-assemblies</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<executions>
<execution>
<id>dist</id>
<phase>prepare-package</phase>
<goals>
<goal>single</goal>
</goals>
<configuration>
<appendAssemblyId>false</appendAssemblyId>
<attach>false</attach>
<finalName>${project.artifactId}-${project.version}</finalName>
<descriptorRefs>
<descriptorRef>hadoop-dynamometer-workload</descriptorRef>
</descriptorRefs>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>deplist</id>
<phase>compile</phase>
<goals>
<goal>list</goal>
</goals>
<configuration>
<!-- referenced by a built-in command -->
<outputFile>${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-builtin.txt</outputFile>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -0,0 +1,52 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script will determine the timestamp of the last transaction appearing in a
# given fsimage by looking at the corresponding edits file. This is useful to determine
# from whence to start collecting audit logs to replay against the fsimage.
if [ $# -lt 1 ] || [ "$1" == "-h" ] || [ "$1" == "--help" ]; then
echo "Usage: ./parse-start-timestamp.sh image-txid [ edits-dir ]"
echo " Finds the last timestamp present in the edit file which ends in"
echo " the specified transaction ID (leading 0s not required)."
echo " If edits-dir is specified, looks for edit files under"
echo " edits-dir/current. Otherwise, looks in the current directory."
exit 1
fi
if [[ $(command -v gawk) == "" ]]; then
echo "This script requires gawk to be available."
exit 1
fi
image_txid="$1"
if [[ $# -ge 2 ]]; then
edits_dir="$2/current"
else
edits_dir="$(pwd)"
fi
edits_file_count="$(find -H "${edits_dir}" -depth 1 -type f -name "edits_*-*$image_txid" | wc -l)"
if [[ "$edits_file_count" != 1 ]]; then
echo "Error; found $edits_file_count matching edit files."
exit 1
fi
edits_file="$(find -H "${edits_dir}" -depth 1 -type f -name "edits_*-*$image_txid")"
# Shellcheck complains about the $ in the single-quote because it won't expand, but this is intentional
# shellcheck disable=SC2016
awk_script='/TIMESTAMP/ { line=$0 } END { match(line, />([[:digit:]]+)</, output); print output[1] }'
echo "Start timestamp for $image_txid is: (this may take a moment)"
hdfs oev -i "$edits_dir/$edits_file" -o >(gawk "$awk_script")

View File

@ -0,0 +1,42 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script simply passes its arguments along to the workload driver
# driver after finding a hadoop command in PATH/HADOOP_COMMON_HOME/HADOOP_HOME
# (searching in that order).
if type hadoop &> /dev/null; then
hadoop_cmd="hadoop"
elif type "$HADOOP_COMMON_HOME/bin/hadoop" &> /dev/null; then
hadoop_cmd="$HADOOP_COMMON_HOME/bin/hadoop"
elif type "$HADOOP_HOME/bin/hadoop" &> /dev/null; then
hadoop_cmd="$HADOOP_HOME/bin/hadoop"
else
echo "Unable to find a valid hadoop command to execute; exiting."
exit 1
fi
script_pwd="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/.."
for f in ${script_pwd}/lib/*.jar; do
# Skip adding the workload JAR since it is added by the `hadoop jar` command
if [[ "$f" != *"dynamometer-workload-"* ]]; then
export HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$f"
fi
done
"$hadoop_cmd" jar "${script_pwd}"/lib/dynamometer-workload-*.jar \
org.apache.hadoop.tools.dynamometer.workloadgenerator.WorkloadDriver "$@"

View File

@ -0,0 +1,146 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.workloadgenerator;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Mapper;
/**
* CreateFileMapper continuously creates 1 byte files for the specified duration
* to increase the number of file objects on the NN.
*
* <p/>Configuration options available:
* <ul>
* <li>{@value NUM_MAPPERS_KEY} (required): Number of mappers to launch.</li>
* <li>{@value DURATION_MIN_KEY} (required): Number of minutes to induce
* workload for.</li>
* <li>{@value SHOULD_DELETE_KEY} (default: {@value SHOULD_DELETE_DEFAULT}):
* If true, delete the files after creating them. This can be useful for
* generating constant load without increasing the number of file
* objects.</li>
* <li>{@value FILE_PARENT_PATH_KEY} (default:
* {@value FILE_PARENT_PATH_DEFAULT}): The root directory in which to
* create files.</li>
* </ul>
*/
public class CreateFileMapper
extends WorkloadMapper<NullWritable, NullWritable> {
public static final String NUM_MAPPERS_KEY = "createfile.num-mappers";
public static final String DURATION_MIN_KEY = "createfile.duration-min";
public static final String FILE_PARENT_PATH_KEY =
"createfile.file-parent-path";
public static final String FILE_PARENT_PATH_DEFAULT = "/tmp/createFileMapper";
public static final String SHOULD_DELETE_KEY = "createfile.should-delete";
public static final boolean SHOULD_DELETE_DEFAULT = false;
/** Custom {@link org.apache.hadoop.mapreduce.Counter} definitions. */
public enum CREATEFILECOUNTERS {
NUMFILESCREATED
}
private long startTimestampMs;
private FileSystem fs;
private Configuration conf;
private int taskID;
private String fileParentPath;
private boolean shouldDelete;
private long endTimeStampMs;
@Override
public String getDescription() {
return "This mapper creates 1-byte files for the specified duration.";
}
@Override
public List<String> getConfigDescriptions() {
return Lists.newArrayList(
NUM_MAPPERS_KEY + " (required): Number of mappers to launch.",
DURATION_MIN_KEY
+ " (required): Number of minutes to induce workload for.",
SHOULD_DELETE_KEY + " (default: " + SHOULD_DELETE_DEFAULT
+ "): If true, delete the files after creating "
+ "them. This can be useful for generating constant load without "
+ "increasing the number of file objects.",
FILE_PARENT_PATH_KEY + " (default: " + FILE_PARENT_PATH_DEFAULT
+ "): The root directory in which to create files.");
}
@Override
public boolean verifyConfigurations(Configuration confToVerify) {
return confToVerify.get(NUM_MAPPERS_KEY) != null
&& confToVerify.get(DURATION_MIN_KEY) != null;
}
@Override
public void map(NullWritable key, NullWritable value,
Mapper.Context mapperContext) throws IOException, InterruptedException {
taskID = mapperContext.getTaskAttemptID().getTaskID().getId();
conf = mapperContext.getConfiguration();
String namenodeURI = conf.get(WorkloadDriver.NN_URI);
startTimestampMs = conf.getLong(WorkloadDriver.START_TIMESTAMP_MS, -1);
fileParentPath = conf.get(FILE_PARENT_PATH_KEY, FILE_PARENT_PATH_DEFAULT);
shouldDelete = conf.getBoolean(SHOULD_DELETE_KEY, SHOULD_DELETE_DEFAULT);
int durationMin = conf.getInt(DURATION_MIN_KEY, -1);
if (durationMin < 0) {
throw new IOException("Duration must be positive; got: " + durationMin);
}
endTimeStampMs = startTimestampMs
+ TimeUnit.MILLISECONDS.convert(durationMin, TimeUnit.MINUTES);
fs = FileSystem.get(URI.create(namenodeURI), conf);
System.out.println("Start timestamp: " + startTimestampMs);
long currentEpoch = System.currentTimeMillis();
long delay = startTimestampMs - currentEpoch;
if (delay > 0) {
System.out.println("Sleeping for " + delay + " ms");
Thread.sleep(delay);
}
String mapperSpecifcPathPrefix = fileParentPath + "/mapper" + taskID;
System.out.println("Mapper path prefix: " + mapperSpecifcPathPrefix);
long numFilesCreated = 0;
Path path;
final byte[] content = {0x0};
while (System.currentTimeMillis() < endTimeStampMs) {
path = new Path(mapperSpecifcPathPrefix + "/file" + numFilesCreated);
OutputStream out = fs.create(path);
out.write(content);
out.close();
numFilesCreated++;
mapperContext.getCounter(CREATEFILECOUNTERS.NUMFILESCREATED)
.increment(1L);
if (numFilesCreated % 1000 == 0) {
mapperContext.progress();
System.out.println("Number of files created: " + numFilesCreated);
}
if (shouldDelete) {
fs.delete(path, true);
}
}
}
}

View File

@ -0,0 +1,57 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.workloadgenerator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
/**
* An input format which does not read any input, but rather starts a
* configurable number of mappers and runs them for a configurable duration.
*/
public class VirtualInputFormat<K, V> extends FileInputFormat<K, V> {
// Number of splits = Number of mappers. Creates fakeSplits to launch
// the required number of mappers
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
Configuration conf = job.getConfiguration();
int numMappers = conf.getInt(CreateFileMapper.NUM_MAPPERS_KEY, -1);
if (numMappers == -1) {
throw new IOException("Number of mappers should be provided as input");
}
List<InputSplit> splits = new ArrayList<InputSplit>(numMappers);
for (int i = 0; i < numMappers; i++) {
splits.add(new VirtualInputSplit());
}
return splits;
}
@Override
public RecordReader<K, V> createRecordReader(InputSplit split,
TaskAttemptContext context) throws IOException, InterruptedException {
return new VirtualRecordReader<>();
}
}

View File

@ -0,0 +1,52 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.workloadgenerator;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.InputSplit;
/**
* A fake input split.
*/
public class VirtualInputSplit extends InputSplit implements Writable {
@Override
public void write(DataOutput out) throws IOException {
// do Nothing
}
@Override
public void readFields(DataInput in) throws IOException {
// do Nothing
}
@Override
public long getLength() throws IOException, InterruptedException {
return 0;
}
@Override
public String[] getLocations() throws IOException, InterruptedException {
return new String[] {};
}
}

View File

@ -0,0 +1,78 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.workloadgenerator;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
/**
* A simple fake record reader which simply runs for some time duration.
*/
@SuppressWarnings("unchecked")
public class VirtualRecordReader<K, V> extends RecordReader<K, V> {
private int durationMs;
private long startTimestampInMs;
private long endTimestampInMs;
private int numRows = 1;
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
durationMs = conf.getInt(CreateFileMapper.DURATION_MIN_KEY, 0) * 60 * 1000;
startTimestampInMs = conf.getInt(WorkloadDriver.START_TIMESTAMP_MS, 0);
endTimestampInMs = startTimestampInMs + durationMs;
}
// The map function per split should be invoked only once.
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if (numRows > 0) {
numRows--;
return true;
} else {
return false;
}
}
@Override
public K getCurrentKey() throws IOException, InterruptedException {
return (K) NullWritable.get();
}
@Override
public V getCurrentValue() throws IOException, InterruptedException {
return (V) NullWritable.get();
}
@Override
public float getProgress() throws IOException, InterruptedException {
long remainingMs = endTimestampInMs - System.currentTimeMillis();
return (remainingMs * 100.0f) / durationMs;
}
@Override
public void close() throws IOException {
// do Nothing
}
};

View File

@ -0,0 +1,208 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.workloadgenerator;
import org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditReplayMapper;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.concurrent.TimeUnit;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.OptionGroup;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This is the driver for generating generic workloads against a NameNode under
* test. It launches a map-only job with a mapper class specified by the
* {@value MAPPER_CLASS_NAME} argument. See the specific mappers (currently
* {@link AuditReplayMapper} and {@link CreateFileMapper}) for information on
* their specific behavior and parameters.
*/
public class WorkloadDriver extends Configured implements Tool {
private static final Logger LOG =
LoggerFactory.getLogger(WorkloadDriver.class);
public static final String START_TIMESTAMP_MS = "start_timestamp_ms";
public static final String START_TIME_OFFSET = "start_time_offset";
public static final String START_TIME_OFFSET_DEFAULT = "1m";
public static final String NN_URI = "nn_uri";
public static final String MAPPER_CLASS_NAME = "mapper_class_name";
public int run(String[] args) throws Exception {
Option helpOption = new Option("h", "help", false,
"Shows this message. Additionally specify the " + MAPPER_CLASS_NAME
+ " argument to show help for a specific mapper class.");
Options options = new Options();
options.addOption(helpOption);
options.addOption(OptionBuilder.withArgName("NN URI").hasArg()
.withDescription("URI of the NameNode under test").isRequired()
.create(NN_URI));
OptionGroup startTimeOptions = new OptionGroup();
startTimeOptions.addOption(OptionBuilder.withArgName("Start Timestamp")
.hasArg().withDescription("Mapper start UTC timestamp in ms")
.create(START_TIMESTAMP_MS));
startTimeOptions
.addOption(OptionBuilder.withArgName("Start Time Offset").hasArg()
.withDescription("Mapper start time as an offset from current "
+ "time. Human-readable formats accepted, e.g. 10m (default "
+ START_TIME_OFFSET_DEFAULT + ").")
.create(START_TIME_OFFSET));
options.addOptionGroup(startTimeOptions);
Option mapperClassOption = OptionBuilder.withArgName("Mapper ClassName")
.hasArg()
.withDescription("Class name of the mapper; must be a WorkloadMapper "
+ "subclass. Mappers supported currently: \n"
+ "1. AuditReplayMapper \n"
+ "2. CreateFileMapper \n"
+ "Fully specified class names are also supported.")
.isRequired().create(MAPPER_CLASS_NAME);
options.addOption(mapperClassOption);
Options helpOptions = new Options();
helpOptions.addOption(helpOption);
Option mapperClassNotRequiredOption = (Option) mapperClassOption.clone();
mapperClassNotRequiredOption.setRequired(false);
helpOptions.addOption(mapperClassNotRequiredOption);
CommandLineParser parser = new PosixParser();
CommandLine cli = parser.parse(helpOptions, args, true);
if (cli.hasOption("h")) {
String footer = null;
if (cli.hasOption(MAPPER_CLASS_NAME)) {
footer = getMapperUsageInfo(cli.getOptionValue(MAPPER_CLASS_NAME));
}
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp(200, "./start-workload [options]", null, options,
footer);
return 1;
}
cli = parser.parse(options, args);
String nnURI = cli.getOptionValue(NN_URI);
long startTimestampMs;
if (cli.hasOption(START_TIMESTAMP_MS)) {
startTimestampMs = Long.parseLong(cli.getOptionValue(START_TIMESTAMP_MS));
} else {
// Leverage the human-readable time parsing capabilities of Configuration
String tmpConfKey = "___temp_config_property___";
Configuration tmpConf = new Configuration();
tmpConf.set(tmpConfKey,
cli.getOptionValue(START_TIME_OFFSET, START_TIME_OFFSET_DEFAULT));
startTimestampMs = tmpConf.getTimeDuration(tmpConfKey, 0,
TimeUnit.MILLISECONDS) + System.currentTimeMillis();
}
Class<? extends WorkloadMapper> mapperClass = getMapperClass(
cli.getOptionValue(MAPPER_CLASS_NAME));
if (!mapperClass.newInstance().verifyConfigurations(getConf())) {
System.err
.println(getMapperUsageInfo(cli.getOptionValue(MAPPER_CLASS_NAME)));
return 1;
}
Job job = getJobForSubmission(getConf(), nnURI, startTimestampMs,
mapperClass);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
public static Job getJobForSubmission(Configuration baseConf, String nnURI,
long startTimestampMs, Class<? extends WorkloadMapper> mapperClass)
throws IOException, ClassNotFoundException, InstantiationException,
IllegalAccessException {
Configuration conf = new Configuration(baseConf);
conf.set(NN_URI, nnURI);
conf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
String startTimeString = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss z")
.format(new Date(startTimestampMs));
LOG.info("The workload will start at " + startTimestampMs + " ms ("
+ startTimeString + ")");
conf.setLong(START_TIMESTAMP_MS, startTimestampMs);
Job job = Job.getInstance(conf, "Dynamometer Workload Driver");
job.setOutputFormatClass(NullOutputFormat.class);
job.setJarByClass(mapperClass);
job.setMapperClass(mapperClass);
job.setInputFormatClass(mapperClass.newInstance().getInputFormat(conf));
job.setOutputFormatClass(NullOutputFormat.class);
job.setNumReduceTasks(0);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
return job;
}
public static void main(String[] args) throws Exception {
WorkloadDriver driver = new WorkloadDriver();
System.exit(ToolRunner.run(driver, args));
}
private Class<? extends WorkloadMapper> getMapperClass(String className)
throws ClassNotFoundException {
if (!className.contains(".")) {
className = WorkloadDriver.class.getPackage().getName() + "." + className;
}
Class<?> mapperClass = getConf().getClassByName(className);
if (!WorkloadMapper.class.isAssignableFrom(mapperClass)) {
throw new IllegalArgumentException(className + " is not a subclass of "
+ WorkloadMapper.class.getCanonicalName());
}
return (Class<? extends WorkloadMapper>) mapperClass;
}
private String getMapperUsageInfo(String mapperClassName)
throws ClassNotFoundException, InstantiationException,
IllegalAccessException {
WorkloadMapper<?, ?> mapper = getMapperClass(mapperClassName).newInstance();
StringBuilder builder = new StringBuilder("Usage for ");
builder.append(mapper.getClass().getSimpleName());
builder.append(":\n");
builder.append(mapper.getDescription());
for (String configDescription : mapper.getConfigDescriptions()) {
builder.append("\n ");
builder.append(configDescription);
}
builder.append("\nConfiguration parameters can be set at the ");
builder.append("_start_ of the argument list like:\n");
builder.append(" -Dconfiguration.key=configurationValue");
return builder.toString();
}
}

View File

@ -0,0 +1,59 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.workloadgenerator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Mapper;
/**
* Represents the base class for a generic workload-generating mapper. By
* default, it will expect to use {@link VirtualInputFormat} as its
* {@link InputFormat}. Subclasses expecting a different {@link InputFormat}
* should override the {@link #getInputFormat(Configuration)} method.
*/
public abstract class WorkloadMapper<KEYIN, VALUEIN>
extends Mapper<KEYIN, VALUEIN, NullWritable, NullWritable> {
/**
* Return the input class to be used by this mapper.
*/
public Class<? extends InputFormat> getInputFormat(Configuration conf) {
return VirtualInputFormat.class;
}
/**
* Get the description of the behavior of this mapper.
*/
public abstract String getDescription();
/**
* Get a list of the description of each configuration that this mapper
* accepts.
*/
public abstract List<String> getConfigDescriptions();
/**
* Verify that the provided configuration contains all configurations required
* by this mapper.
*/
public abstract boolean verifyConfigurations(Configuration conf);
}

View File

@ -0,0 +1,57 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.workloadgenerator.audit;
import java.io.IOException;
import java.util.function.Function;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
/**
* This interface represents a pluggable command parser. It will accept in one
* line of {@link Text} input at a time and return an {@link AuditReplayCommand}
* which represents the input text. Each input line should produce exactly one
* command.
*/
public interface AuditCommandParser {
/**
* Initialize this parser with the given configuration. Guaranteed to be
* called prior to any calls to {@link #parse(Text, Function)}.
*
* @param conf The Configuration to be used to set up this parser.
*/
void initialize(Configuration conf) throws IOException;
/**
* Convert a line of input into an {@link AuditReplayCommand}. Since
* {@link AuditReplayCommand}s store absolute timestamps, relativeToAbsolute
* can be used to convert relative timestamps (i.e., milliseconds elapsed
* between the start of the audit log and this command) into absolute
* timestamps.
*
* @param inputLine Single input line to convert.
* @param relativeToAbsolute Function converting relative timestamps
* (in milliseconds) to absolute timestamps
* (in milliseconds).
* @return A command representing the input line.
*/
AuditReplayCommand parse(Text inputLine,
Function<Long, Long> relativeToAbsolute) throws IOException;
}

View File

@ -0,0 +1,145 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.workloadgenerator.audit;
import com.google.common.base.Splitter;
import java.io.IOException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Map;
import java.util.TimeZone;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
/**
* This {@link AuditCommandParser} is used to read commands from an audit log in
* the original format audit logs are produced in with a standard configuration.
* It requires setting the {@value AUDIT_START_TIMESTAMP_KEY} configuration to
* specify what the start time of the audit log was to determine when events
* occurred relative to this start time.
*
* <p/>By default, this assumes that the audit log is in the default log format
* set up by Hadoop, like:
* <pre>{@code
* 1970-01-01 00:00:00,000 INFO FSNamesystem.audit: allowed=true ...
* }</pre>
* You can adjust this parsing behavior using the various configurations
* available.
*/
public class AuditLogDirectParser implements AuditCommandParser {
/** See class Javadoc for more detail. */
public static final String AUDIT_START_TIMESTAMP_KEY =
"auditreplay.log-start-time.ms";
/**
* The format string used to parse the date which is present in the audit
* log. This must be a format understood by {@link SimpleDateFormat}.
*/
public static final String AUDIT_LOG_DATE_FORMAT_KEY =
"auditreplay.log-date.format";
public static final String AUDIT_LOG_DATE_FORMAT_DEFAULT =
"yyyy-MM-dd HH:mm:ss,SSS";
/**
* The time zone to use when parsing the audit log timestamp. This must
* be a format recognized by {@link TimeZone#getTimeZone(String)}.
*/
public static final String AUDIT_LOG_DATE_TIME_ZONE_KEY =
"auditreplay.log-date.time-zone";
public static final String AUDIT_LOG_DATE_TIME_ZONE_DEFAULT = "UTC";
/**
* The regex to use when parsing the audit log lines. This should match
* against a single log line, and create two named capture groups. One
* must be titled "timestamp" and return a timestamp which can be parsed
* by a {@link DateFormat date formatter}. The other must be titled "message"
* and return the audit content, such as "allowed=true ugi=user ...". See
* {@link #AUDIT_LOG_PARSE_REGEX_DEFAULT} for an example.
*/
public static final String AUDIT_LOG_PARSE_REGEX_KEY =
"auditreplay.log-parse-regex";
public static final String AUDIT_LOG_PARSE_REGEX_DEFAULT =
"^(?<timestamp>.+?) INFO [^:]+: (?<message>.+)$";
private static final Splitter.MapSplitter AUDIT_SPLITTER = Splitter.on("\t")
.trimResults().omitEmptyStrings().withKeyValueSeparator("=");
private static final Splitter SPACE_SPLITTER = Splitter.on(" ").trimResults()
.omitEmptyStrings();
private long startTimestamp;
private DateFormat dateFormat;
private Pattern logLineParseRegex;
@Override
public void initialize(Configuration conf) throws IOException {
startTimestamp = conf.getLong(AUDIT_START_TIMESTAMP_KEY, -1);
if (startTimestamp < 0) {
throw new IOException(
"Invalid or missing audit start timestamp: " + startTimestamp);
}
dateFormat = new SimpleDateFormat(conf.get(AUDIT_LOG_DATE_FORMAT_KEY,
AUDIT_LOG_DATE_FORMAT_DEFAULT));
String timeZoneString = conf.get(AUDIT_LOG_DATE_TIME_ZONE_KEY,
AUDIT_LOG_DATE_TIME_ZONE_DEFAULT);
dateFormat.setTimeZone(TimeZone.getTimeZone(timeZoneString));
String logLineParseRegexString =
conf.get(AUDIT_LOG_PARSE_REGEX_KEY, AUDIT_LOG_PARSE_REGEX_DEFAULT);
if (!logLineParseRegexString.contains("(?<timestamp>")
&& logLineParseRegexString.contains("(?<message>")) {
throw new IllegalArgumentException("Must configure regex with named "
+ "capture groups 'timestamp' and 'message'");
}
logLineParseRegex = Pattern.compile(logLineParseRegexString);
}
@Override
public AuditReplayCommand parse(Text inputLine,
Function<Long, Long> relativeToAbsolute) throws IOException {
Matcher m = logLineParseRegex.matcher(inputLine.toString());
if (!m.find()) {
throw new IOException(
"Unable to find valid message pattern from audit log line: `"
+ inputLine + "` using regex `" + logLineParseRegex + "`");
}
long relativeTimestamp;
try {
relativeTimestamp = dateFormat.parse(m.group("timestamp")).getTime()
- startTimestamp;
} catch (ParseException p) {
throw new IOException("Exception while parsing timestamp from audit log",
p);
}
// Sanitize the = in the rename options field into a : so we can split on =
String auditMessageSanitized =
m.group("message").replace("(options=", "(options:");
Map<String, String> parameterMap = AUDIT_SPLITTER
.split(auditMessageSanitized);
return new AuditReplayCommand(relativeToAbsolute.apply(relativeTimestamp),
// Split the UGI on space to remove the auth and proxy portions of it
SPACE_SPLITTER.split(parameterMap.get("ugi")).iterator().next(),
parameterMap.get("cmd").replace("(options:", "(options="),
parameterMap.get("src"), parameterMap.get("dst"),
parameterMap.get("ip"));
}
}

View File

@ -0,0 +1,70 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.workloadgenerator.audit;
import java.io.IOException;
import java.util.function.Function;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
/**
* This {@link AuditCommandParser} is used to read commands assuming that the
* input was generated by a Hive query storing uncompressed output files, in
* which fields should be separated by the start-of-heading (U+0001) character.
* The fields available should be, in order:
*
* <pre>
* relativeTimestampMs,ugi,command,src,dest,sourceIP
* </pre>
*
* Where relativeTimestampMs represents the time elapsed between the start of
* the audit log and the occurrence of the audit event. Assuming your audit logs
* are available in Hive, this can be generated with a query looking like:
*
* <pre>
* INSERT OVERWRITE DIRECTORY '${outputPath}'
* SELECT (timestamp - ${startTime} AS relTime, ugi, cmd, src, dst, ip
* FROM '${auditLogTableLocation}'
* WHERE timestamp >= ${startTime} AND timestamp < ${endTime}
* DISTRIBUTE BY src
* SORT BY relTime ASC;
* </pre>
*
* Note that the sorting step is important; events in each distinct file must be
* in time-ascending order.
*/
public class AuditLogHiveTableParser implements AuditCommandParser {
private static final String FIELD_SEPARATOR = "\u0001";
@Override
public void initialize(Configuration conf) throws IOException {
// Nothing to be done
}
@Override
public AuditReplayCommand parse(Text inputLine,
Function<Long, Long> relativeToAbsolute) throws IOException {
String[] fields = inputLine.toString().split(FIELD_SEPARATOR);
long absoluteTimestamp = relativeToAbsolute
.apply(Long.parseLong(fields[0]));
return new AuditReplayCommand(absoluteTimestamp, fields[1], fields[2],
fields[3], fields[4], fields[5]);
}
}

View File

@ -0,0 +1,163 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.workloadgenerator.audit;
import java.io.IOException;
import java.util.Objects;
import java.util.concurrent.Delayed;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class represents a single command to be replayed by the workload
* generator. It implements the {@link Delayed} interface so that they can be
* fetched in timestamp order from a {@link java.util.concurrent.DelayQueue}.
* You can use the {@link #getPoisonPill(long)} method to retrieve "Poison Pill"
* {@link AuditReplayCommand} which has {@link #isPoison()} as true,
* representing to a consumer(s) of the {@link java.util.concurrent.DelayQueue}
* that it should stop processing further items and instead terminate itself.
*/
class AuditReplayCommand implements Delayed {
private static final Logger LOG = LoggerFactory
.getLogger(AuditReplayCommand.class);
private static final Pattern SIMPLE_UGI_PATTERN = Pattern
.compile("([^/@ ]*).*?");
private long absoluteTimestamp;
private String ugi;
private String command;
private String src;
private String dest;
private String sourceIP;
AuditReplayCommand(long absoluteTimestamp, String ugi, String command,
String src, String dest, String sourceIP) {
this.absoluteTimestamp = absoluteTimestamp;
this.ugi = ugi;
this.command = command;
this.src = src;
this.dest = dest;
this.sourceIP = sourceIP;
}
long getAbsoluteTimestamp() {
return absoluteTimestamp;
}
String getSimpleUgi() {
Matcher m = SIMPLE_UGI_PATTERN.matcher(ugi);
if (m.matches()) {
return m.group(1);
} else {
LOG.error("Error parsing simple UGI <{}>; falling back to current user",
ugi);
try {
return UserGroupInformation.getCurrentUser().getShortUserName();
} catch (IOException ioe) {
return "";
}
}
}
String getCommand() {
return command;
}
String getSrc() {
return src;
}
String getDest() {
return dest;
}
String getSourceIP() {
return sourceIP;
}
@Override
public long getDelay(TimeUnit unit) {
return unit.convert(absoluteTimestamp - System.currentTimeMillis(),
TimeUnit.MILLISECONDS);
}
@Override
public int compareTo(Delayed o) {
return Long.compare(absoluteTimestamp,
((AuditReplayCommand) o).absoluteTimestamp);
}
/**
* If true, the thread which consumes this item should not process any further
* items and instead simply terminate itself.
*/
boolean isPoison() {
return false;
}
/**
* A command representing a Poison Pill, indicating that the processing thread
* should not process any further items and instead should terminate itself.
* Always returns true for {@link #isPoison()}. It does not contain any other
* information besides a timestamp; other getter methods wil return null.
*/
private static final class PoisonPillCommand extends AuditReplayCommand {
private PoisonPillCommand(long absoluteTimestamp) {
super(absoluteTimestamp, null, null, null, null, null);
}
@Override
boolean isPoison() {
return true;
}
}
static AuditReplayCommand getPoisonPill(long relativeTimestamp) {
return new PoisonPillCommand(relativeTimestamp);
}
@Override
public boolean equals(Object other) {
if (!(other instanceof AuditReplayCommand)) {
return false;
}
AuditReplayCommand o = (AuditReplayCommand) other;
return absoluteTimestamp == o.absoluteTimestamp && ugi.equals(o.ugi)
&& command.equals(o.command) && src.equals(o.src) && dest.equals(o.dest)
&& sourceIP.equals(o.sourceIP);
}
@Override
public int hashCode() {
return Objects.hash(absoluteTimestamp, ugi, command, src, dest, sourceIP);
}
@Override
public String toString() {
return String.format("AuditReplayCommand(absoluteTimestamp=%d, ugi=%s, "
+ "command=%s, src=%s, dest=%s, sourceIP=%s",
absoluteTimestamp, ugi, command, src, dest, sourceIP);
}
}

View File

@ -0,0 +1,290 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.workloadgenerator.audit;
import com.google.common.collect.Lists;
import java.util.Optional;
import java.util.function.Function;
import org.apache.hadoop.tools.dynamometer.workloadgenerator.WorkloadDriver;
import org.apache.hadoop.tools.dynamometer.workloadgenerator.WorkloadMapper;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.DelayQueue;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditReplayMapper.CommandType.READ;
import static org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditReplayMapper.CommandType.WRITE;
/**
* AuditReplayMapper replays the given audit trace against the NameNode under
* test. Each mapper spawns a number of threads equal to the
* {@value NUM_THREADS_KEY} configuration (by default
* {@value NUM_THREADS_DEFAULT}) to use for replaying. Each mapper reads a
* single input file which will be consumed by all of the available threads. A
* {@link FileInputFormat} with splitting disabled is used so any files present
* in the input path directory (given by the {@value INPUT_PATH_KEY}
* configuration) will be used as input; one file per mapper. The expected
* format of these files is determined by the value of the
* {@value COMMAND_PARSER_KEY} configuration, which defaults to
* {@link AuditLogDirectParser}.
*
* <p/>This generates a number of {@link org.apache.hadoop.mapreduce.Counter}
* values which can be used to get information into the replay, including the
* number of commands replayed, how many of them were "invalid" (threw an
* exception), how many were "late" (replayed later than they should have been),
* and the latency (from client perspective) of each command. If there are a
* large number of "late" commands, you likely need to increase the number of
* threads used and/or the number of mappers.
*
* <p/>By default, commands will be replayed at the same rate as they were
* originally performed. However a rate factor can be specified via the
* {@value RATE_FACTOR_KEY} configuration; all of the (relative) timestamps will
* be divided by this rate factor, effectively changing the rate at which they
* are replayed. For example, a rate factor of 2 would make the replay occur
* twice as fast, and a rate factor of 0.5 would make it occur half as fast.
*/
public class AuditReplayMapper extends WorkloadMapper<LongWritable, Text> {
public static final String INPUT_PATH_KEY = "auditreplay.input-path";
public static final String NUM_THREADS_KEY = "auditreplay.num-threads";
public static final int NUM_THREADS_DEFAULT = 1;
public static final String CREATE_BLOCKS_KEY = "auditreplay.create-blocks";
public static final boolean CREATE_BLOCKS_DEFAULT = true;
public static final String RATE_FACTOR_KEY = "auditreplay.rate-factor";
public static final double RATE_FACTOR_DEFAULT = 1.0;
public static final String COMMAND_PARSER_KEY =
"auditreplay.command-parser.class";
public static final Class<AuditLogDirectParser> COMMAND_PARSER_DEFAULT =
AuditLogDirectParser.class;
private static final Logger LOG =
LoggerFactory.getLogger(AuditReplayMapper.class);
// This is the maximum amount that the mapper should read ahead from the input
// as compared to the replay time. Setting this to one minute avoids reading
// too
// many entries into memory simultaneously but ensures that the replay threads
// should not ever run out of entries to replay.
private static final long MAX_READAHEAD_MS = 60000;
public static final String INDIVIDUAL_COMMANDS_COUNTER_GROUP =
"INDIVIDUAL_COMMANDS";
public static final String INDIVIDUAL_COMMANDS_LATENCY_SUFFIX = "_LATENCY";
public static final String INDIVIDUAL_COMMANDS_INVALID_SUFFIX = "_INVALID";
public static final String INDIVIDUAL_COMMANDS_COUNT_SUFFIX = "_COUNT";
/** {@link org.apache.hadoop.mapreduce.Counter} definitions for replay. */
public enum REPLAYCOUNTERS {
// Total number of commands that were replayed
TOTALCOMMANDS,
// Total number of commands that returned an error during replay (incl
// unsupported)
TOTALINVALIDCOMMANDS,
// Total number of commands that are unsupported for replay
TOTALUNSUPPORTEDCOMMANDS,
// Total number of commands that were performed later than they should have
// been
LATECOMMANDS,
// Total delay time of all commands that were performed later than they
// should have been
LATECOMMANDSTOTALTIME,
// Total number of write operations
TOTALWRITECOMMANDS,
// Total latency for all write operations
TOTALWRITECOMMANDLATENCY,
// Total number of read operations
TOTALREADCOMMANDS,
// Total latency for all read operations
TOTALREADCOMMANDLATENCY
}
/** Definitions of the various commands that can be replayed. */
public enum ReplayCommand {
APPEND(WRITE),
CREATE(WRITE),
GETFILEINFO(READ),
CONTENTSUMMARY(READ),
MKDIRS(WRITE),
RENAME(WRITE),
LISTSTATUS(READ),
DELETE(WRITE),
OPEN(READ),
SETPERMISSION(WRITE),
SETOWNER(WRITE),
SETTIMES(WRITE),
SETREPLICATION(WRITE),
CONCAT(WRITE);
private final CommandType type;
ReplayCommand(CommandType type) {
this.type = type;
}
public CommandType getType() {
return type;
}
}
/** Define the type of command, either read or write. */
public enum CommandType {
READ, WRITE
}
private long startTimestampMs;
private int numThreads;
private double rateFactor;
private long highestTimestamp;
private List<AuditReplayThread> threads;
private DelayQueue<AuditReplayCommand> commandQueue;
private Function<Long, Long> relativeToAbsoluteTimestamp;
private AuditCommandParser commandParser;
private ScheduledThreadPoolExecutor progressExecutor;
@Override
public Class<? extends InputFormat> getInputFormat(Configuration conf) {
return NoSplitTextInputFormat.class;
}
@Override
public String getDescription() {
return "This mapper replays audit log files.";
}
@Override
public List<String> getConfigDescriptions() {
return Lists.newArrayList(
INPUT_PATH_KEY
+ " (required): Path to directory containing input files.",
NUM_THREADS_KEY + " (default " + NUM_THREADS_DEFAULT
+ "): Number of threads to use per mapper for replay.",
CREATE_BLOCKS_KEY + " (default " + CREATE_BLOCKS_DEFAULT
+ "): Whether or not to create 1-byte blocks when "
+ "performing `create` commands.",
RATE_FACTOR_KEY + " (default " + RATE_FACTOR_DEFAULT
+ "): Multiplicative speed at which to replay the audit "
+ "log; e.g. a value of 2.0 would make the replay occur at "
+ "twice the original speed. This can be useful "
+ "to induce heavier loads.");
}
@Override
public boolean verifyConfigurations(Configuration conf) {
return conf.get(INPUT_PATH_KEY) != null;
}
@Override
public void setup(final Mapper.Context context) throws IOException {
Configuration conf = context.getConfiguration();
// WorkloadDriver ensures that the starttimestamp is set
startTimestampMs = conf.getLong(WorkloadDriver.START_TIMESTAMP_MS, -1);
numThreads = conf.getInt(NUM_THREADS_KEY, NUM_THREADS_DEFAULT);
rateFactor = conf.getDouble(RATE_FACTOR_KEY, RATE_FACTOR_DEFAULT);
try {
commandParser = conf.getClass(COMMAND_PARSER_KEY, COMMAND_PARSER_DEFAULT,
AuditCommandParser.class).getConstructor().newInstance();
} catch (NoSuchMethodException | InstantiationException
| IllegalAccessException | InvocationTargetException e) {
throw new IOException(
"Exception encountered while instantiating the command parser", e);
}
commandParser.initialize(conf);
relativeToAbsoluteTimestamp =
(input) -> startTimestampMs + Math.round(input / rateFactor);
LOG.info("Starting " + numThreads + " threads");
progressExecutor = new ScheduledThreadPoolExecutor(1);
// half of the timeout or once per minute if none specified
long progressFrequencyMs = conf.getLong(MRJobConfig.TASK_TIMEOUT,
2 * 60 * 1000) / 2;
progressExecutor.scheduleAtFixedRate(context::progress,
progressFrequencyMs, progressFrequencyMs, TimeUnit.MILLISECONDS);
threads = new ArrayList<>();
ConcurrentMap<String, FileSystem> fsCache = new ConcurrentHashMap<>();
commandQueue = new DelayQueue<>();
for (int i = 0; i < numThreads; i++) {
AuditReplayThread thread = new AuditReplayThread(context, commandQueue,
fsCache);
threads.add(thread);
thread.start();
}
}
@Override
public void map(LongWritable lineNum, Text inputLine, Mapper.Context context)
throws IOException, InterruptedException {
AuditReplayCommand cmd = commandParser.parse(inputLine,
relativeToAbsoluteTimestamp);
long delay = cmd.getDelay(TimeUnit.MILLISECONDS);
// Prevent from loading too many elements into memory all at once
if (delay > MAX_READAHEAD_MS) {
Thread.sleep(delay - (MAX_READAHEAD_MS / 2));
}
commandQueue.put(cmd);
highestTimestamp = cmd.getAbsoluteTimestamp();
}
@Override
public void cleanup(Mapper.Context context) throws InterruptedException {
for (AuditReplayThread t : threads) {
// Add in an indicator for each thread to shut down after the last real
// command
t.addToQueue(AuditReplayCommand.getPoisonPill(highestTimestamp + 1));
}
Optional<Exception> threadException = Optional.empty();
for (AuditReplayThread t : threads) {
t.join();
t.drainCounters(context);
if (t.getException() != null) {
threadException = Optional.of(t.getException());
}
}
progressExecutor.shutdown();
if (threadException.isPresent()) {
throw new RuntimeException("Exception in AuditReplayThread",
threadException.get());
}
LOG.info("Time taken to replay the logs in ms: "
+ (System.currentTimeMillis() - startTimestampMs));
long totalCommands = context.getCounter(REPLAYCOUNTERS.TOTALCOMMANDS)
.getValue();
if (totalCommands != 0) {
double percentageOfInvalidOps =
context.getCounter(REPLAYCOUNTERS.TOTALINVALIDCOMMANDS).getValue()
* 100.0 / totalCommands;
LOG.info("Percentage of invalid ops: " + percentageOfInvalidOps);
}
}
}

View File

@ -0,0 +1,310 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.workloadgenerator.audit;
import com.google.common.base.Splitter;
import org.apache.hadoop.tools.dynamometer.workloadgenerator.WorkloadDriver;
import java.io.IOException;
import java.net.URI;
import java.security.PrivilegedAction;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.DelayQueue;
import java.util.concurrent.TimeUnit;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.counters.GenericCounter;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditReplayMapper.REPLAYCOUNTERS;
import org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditReplayMapper.ReplayCommand;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditReplayMapper.INDIVIDUAL_COMMANDS_COUNTER_GROUP;
import static org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditReplayMapper.INDIVIDUAL_COMMANDS_COUNT_SUFFIX;
import static org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditReplayMapper.INDIVIDUAL_COMMANDS_INVALID_SUFFIX;
import static org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditReplayMapper.INDIVIDUAL_COMMANDS_LATENCY_SUFFIX;
/**
* This class replays each audit log entry at a specified timestamp in the
* future. Each of these threads maintains a {@link DelayQueue} into which items
* are inserted by the {@link AuditReplayMapper}. Once an item is ready, this
* thread will fetch the command from the queue and attempt to replay it.
*/
public class AuditReplayThread extends Thread {
private static final Logger LOG =
LoggerFactory.getLogger(AuditReplayThread.class);
private DelayQueue<AuditReplayCommand> commandQueue;
private ConcurrentMap<String, FileSystem> fsCache;
private URI namenodeUri;
private UserGroupInformation loginUser;
private Configuration mapperConf;
// If any exception is encountered it will be stored here
private Exception exception;
private long startTimestampMs;
private boolean createBlocks;
// Counters are not thread-safe so we store a local mapping in our thread
// and merge them all together at the end.
private Map<REPLAYCOUNTERS, Counter> replayCountersMap = new HashMap<>();
private Map<String, Counter> individualCommandsMap = new HashMap<>();
AuditReplayThread(Mapper.Context mapperContext,
DelayQueue<AuditReplayCommand> queue,
ConcurrentMap<String, FileSystem> fsCache) throws IOException {
commandQueue = queue;
this.fsCache = fsCache;
loginUser = UserGroupInformation.getLoginUser();
mapperConf = mapperContext.getConfiguration();
namenodeUri = URI.create(mapperConf.get(WorkloadDriver.NN_URI));
startTimestampMs = mapperConf.getLong(WorkloadDriver.START_TIMESTAMP_MS,
-1);
createBlocks = mapperConf.getBoolean(AuditReplayMapper.CREATE_BLOCKS_KEY,
AuditReplayMapper.CREATE_BLOCKS_DEFAULT);
LOG.info("Start timestamp: " + startTimestampMs);
for (REPLAYCOUNTERS rc : REPLAYCOUNTERS.values()) {
replayCountersMap.put(rc, new GenericCounter());
}
for (ReplayCommand replayCommand : ReplayCommand.values()) {
individualCommandsMap.put(
replayCommand + INDIVIDUAL_COMMANDS_COUNT_SUFFIX,
new GenericCounter());
individualCommandsMap.put(
replayCommand + INDIVIDUAL_COMMANDS_LATENCY_SUFFIX,
new GenericCounter());
individualCommandsMap.put(
replayCommand + INDIVIDUAL_COMMANDS_INVALID_SUFFIX,
new GenericCounter());
}
}
/**
* Merge all of this thread's counter values into the counters contained
* within the passed context.
*
* @param context The context holding the counters to increment.
*/
void drainCounters(Mapper.Context context) {
for (Map.Entry<REPLAYCOUNTERS, Counter> ent : replayCountersMap
.entrySet()) {
context.getCounter(ent.getKey()).increment(ent.getValue().getValue());
}
for (Map.Entry<String, Counter> ent : individualCommandsMap.entrySet()) {
context.getCounter(INDIVIDUAL_COMMANDS_COUNTER_GROUP, ent.getKey())
.increment(ent.getValue().getValue());
}
}
/**
* Add a command to this thread's processing queue.
*
* @param cmd Command to add.
*/
void addToQueue(AuditReplayCommand cmd) {
commandQueue.put(cmd);
}
/**
* Get the Exception that caused this thread to stop running, if any, else
* null. Should not be called until this thread has already completed (i.e.,
* after {@link #join()} has been called).
*
* @return The exception which was thrown, if any.
*/
Exception getException() {
return exception;
}
@Override
public void run() {
long currentEpoch = System.currentTimeMillis();
long delay = startTimestampMs - currentEpoch;
try {
if (delay > 0) {
LOG.info("Sleeping for " + delay + " ms");
Thread.sleep(delay);
} else {
LOG.warn("Starting late by " + (-1 * delay) + " ms");
}
AuditReplayCommand cmd = commandQueue.take();
while (!cmd.isPoison()) {
replayCountersMap.get(REPLAYCOUNTERS.TOTALCOMMANDS).increment(1);
delay = cmd.getDelay(TimeUnit.MILLISECONDS);
if (delay < -5) { // allow some tolerance here
replayCountersMap.get(REPLAYCOUNTERS.LATECOMMANDS).increment(1);
replayCountersMap.get(REPLAYCOUNTERS.LATECOMMANDSTOTALTIME)
.increment(-1 * delay);
}
if (!replayLog(cmd)) {
replayCountersMap.get(REPLAYCOUNTERS.TOTALINVALIDCOMMANDS)
.increment(1);
}
cmd = commandQueue.take();
}
} catch (InterruptedException e) {
LOG.error("Interrupted; exiting from thread.", e);
} catch (Exception e) {
exception = e;
LOG.error("ReplayThread encountered exception; exiting.", e);
}
}
/**
* Attempt to replay the provided command. Updates counters accordingly.
*
* @param command The command to replay
* @return True iff the command was successfully replayed (i.e., no exceptions
* were thrown).
*/
private boolean replayLog(final AuditReplayCommand command) {
final String src = command.getSrc();
final String dst = command.getDest();
FileSystem proxyFs = fsCache.get(command.getSimpleUgi());
if (proxyFs == null) {
UserGroupInformation ugi = UserGroupInformation
.createProxyUser(command.getSimpleUgi(), loginUser);
proxyFs = ugi.doAs((PrivilegedAction<FileSystem>) () -> {
try {
FileSystem fs = new DistributedFileSystem();
fs.initialize(namenodeUri, mapperConf);
return fs;
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
});
fsCache.put(command.getSimpleUgi(), proxyFs);
}
final FileSystem fs = proxyFs;
ReplayCommand replayCommand;
try {
replayCommand = ReplayCommand
.valueOf(command.getCommand().split(" ")[0].toUpperCase());
} catch (IllegalArgumentException iae) {
LOG.warn("Unsupported/invalid command: " + command);
replayCountersMap.get(REPLAYCOUNTERS.TOTALUNSUPPORTEDCOMMANDS)
.increment(1);
return false;
}
try {
long startTime = System.currentTimeMillis();
switch (replayCommand) {
case CREATE:
FSDataOutputStream fsDos = fs.create(new Path(src));
if (createBlocks) {
fsDos.writeByte(0);
}
fsDos.close();
break;
case GETFILEINFO:
fs.getFileStatus(new Path(src));
break;
case CONTENTSUMMARY:
fs.getContentSummary(new Path(src));
break;
case MKDIRS:
fs.mkdirs(new Path(src));
break;
case RENAME:
fs.rename(new Path(src), new Path(dst));
break;
case LISTSTATUS:
((DistributedFileSystem) fs).getClient().listPaths(src,
HdfsFileStatus.EMPTY_NAME);
break;
case APPEND:
fs.append(new Path(src));
return true;
case DELETE:
fs.delete(new Path(src), true);
break;
case OPEN:
fs.open(new Path(src)).close();
break;
case SETPERMISSION:
fs.setPermission(new Path(src), FsPermission.getDefault());
break;
case SETOWNER:
fs.setOwner(new Path(src),
UserGroupInformation.getCurrentUser().getShortUserName(),
UserGroupInformation.getCurrentUser().getPrimaryGroupName());
break;
case SETTIMES:
fs.setTimes(new Path(src), System.currentTimeMillis(),
System.currentTimeMillis());
break;
case SETREPLICATION:
fs.setReplication(new Path(src), (short) 1);
break;
case CONCAT:
// dst is like [path1, path2] - strip brackets and split on comma
String bareDist = dst.length() < 2 ? ""
: dst.substring(1, dst.length() - 1).trim();
List<Path> dsts = new ArrayList<>();
for (String s : Splitter.on(",").omitEmptyStrings().trimResults()
.split(bareDist)) {
dsts.add(new Path(s));
}
fs.concat(new Path(src), dsts.toArray(new Path[] {}));
break;
default:
throw new RuntimeException("Unexpected command: " + replayCommand);
}
long latency = System.currentTimeMillis() - startTime;
switch (replayCommand.getType()) {
case WRITE:
replayCountersMap.get(REPLAYCOUNTERS.TOTALWRITECOMMANDLATENCY)
.increment(latency);
replayCountersMap.get(REPLAYCOUNTERS.TOTALWRITECOMMANDS).increment(1);
break;
case READ:
replayCountersMap.get(REPLAYCOUNTERS.TOTALREADCOMMANDLATENCY)
.increment(latency);
replayCountersMap.get(REPLAYCOUNTERS.TOTALREADCOMMANDS).increment(1);
break;
default:
throw new RuntimeException("Unexpected command type: "
+ replayCommand.getType());
}
individualCommandsMap
.get(replayCommand + INDIVIDUAL_COMMANDS_LATENCY_SUFFIX)
.increment(latency);
individualCommandsMap
.get(replayCommand + INDIVIDUAL_COMMANDS_COUNT_SUFFIX).increment(1);
return true;
} catch (IOException e) {
LOG.debug("IOException: " + e.getLocalizedMessage());
individualCommandsMap
.get(replayCommand + INDIVIDUAL_COMMANDS_INVALID_SUFFIX).increment(1);
return false;
}
}
}

View File

@ -0,0 +1,47 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.workloadgenerator.audit;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
/**
* A simple {@link TextInputFormat} that disables splitting of files. This is
* the {@link org.apache.hadoop.mapreduce.InputFormat} used by
* {@link AuditReplayMapper}.
*/
public class NoSplitTextInputFormat extends TextInputFormat {
@Override
public List<FileStatus> listStatus(JobContext context) throws IOException {
context.getConfiguration().set(FileInputFormat.INPUT_DIR,
context.getConfiguration().get(AuditReplayMapper.INPUT_PATH_KEY));
return super.listStatus(context);
}
@Override
public boolean isSplitable(JobContext context, Path file) {
return false;
}
}

View File

@ -0,0 +1,27 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This contains the tooling necessary to use audit logs as a way to generate
* a realistic workload against a Dynamometer-simulated NameNode.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
package org.apache.hadoop.tools.dynamometer.workloadgenerator.audit;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;

View File

@ -0,0 +1,28 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This contains the tooling necessary to generate a realistic workload,
* for example collected from audit logs, against a Dynamometer-simulated
* NameNode.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
package org.apache.hadoop.tools.dynamometer.workloadgenerator;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;

View File

@ -0,0 +1,136 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.workloadgenerator;
import org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditCommandParser;
import org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditLogDirectParser;
import org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditLogHiveTableParser;
import org.apache.hadoop.tools.dynamometer.workloadgenerator.audit.AuditReplayMapper;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authorize.AuthorizationException;
import org.apache.hadoop.security.authorize.ImpersonationProvider;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_IMPERSONATION_PROVIDER_CLASS;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
/** Tests for {@link WorkloadDriver} and related classes. */
public class TestWorkloadGenerator {
private Configuration conf;
private MiniDFSCluster miniCluster;
private FileSystem dfs;
@Before
public void setup() throws Exception {
conf = new Configuration();
conf.setClass(HADOOP_SECURITY_IMPERSONATION_PROVIDER_CLASS,
AllowUserImpersonationProvider.class, ImpersonationProvider.class);
miniCluster = new MiniDFSCluster.Builder(conf).build();
miniCluster.waitClusterUp();
dfs = miniCluster.getFileSystem();
dfs.mkdirs(new Path("/tmp"),
new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
dfs.setOwner(new Path("/tmp"), "hdfs", "hdfs");
}
@After
public void tearDown() throws Exception {
if (miniCluster != null) {
miniCluster.shutdown();
miniCluster = null;
}
}
@Test
public void testAuditWorkloadDirectParser() throws Exception {
String workloadInputPath = TestWorkloadGenerator.class.getClassLoader()
.getResource("audit_trace_direct").toString();
conf.set(AuditReplayMapper.INPUT_PATH_KEY, workloadInputPath);
conf.setLong(AuditLogDirectParser.AUDIT_START_TIMESTAMP_KEY, 60 * 1000);
testAuditWorkload();
}
@Test
public void testAuditWorkloadHiveParser() throws Exception {
String workloadInputPath = TestWorkloadGenerator.class.getClassLoader()
.getResource("audit_trace_hive").toString();
conf.set(AuditReplayMapper.INPUT_PATH_KEY, workloadInputPath);
conf.setClass(AuditReplayMapper.COMMAND_PARSER_KEY,
AuditLogHiveTableParser.class, AuditCommandParser.class);
testAuditWorkload();
}
/**
* {@link ImpersonationProvider} that confirms the user doing the
* impersonating is the same as the user running the MiniCluster.
*/
private static class AllowUserImpersonationProvider extends Configured
implements ImpersonationProvider {
public void init(String configurationPrefix) {
// Do nothing
}
public void authorize(UserGroupInformation user, String remoteAddress)
throws AuthorizationException {
try {
if (!user.getRealUser().getShortUserName()
.equals(UserGroupInformation.getCurrentUser().getShortUserName())) {
throw new AuthorizationException();
}
} catch (IOException ioe) {
throw new AuthorizationException(ioe);
}
}
}
private void testAuditWorkload() throws Exception {
long workloadStartTime = System.currentTimeMillis() + 10000;
Job workloadJob = WorkloadDriver.getJobForSubmission(conf,
dfs.getUri().toString(), workloadStartTime, AuditReplayMapper.class);
boolean success = workloadJob.waitForCompletion(true);
assertTrue("workload job should succeed", success);
Counters counters = workloadJob.getCounters();
assertEquals(6,
counters.findCounter(AuditReplayMapper.REPLAYCOUNTERS.TOTALCOMMANDS)
.getValue());
assertEquals(1,
counters
.findCounter(AuditReplayMapper.REPLAYCOUNTERS.TOTALINVALIDCOMMANDS)
.getValue());
assertTrue(dfs.getFileStatus(new Path("/tmp/test1")).isFile());
assertTrue(
dfs.getFileStatus(new Path("/tmp/testDirRenamed")).isDirectory());
assertFalse(dfs.exists(new Path("/denied")));
}
}

View File

@ -0,0 +1,143 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools.dynamometer.workloadgenerator.audit;
import java.util.function.Function;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
/** Tests for {@link AuditLogDirectParser}. */
public class TestAuditLogDirectParser {
private static final long START_TIMESTAMP = 10000;
private AuditLogDirectParser parser;
@Before
public void setup() throws Exception {
parser = new AuditLogDirectParser();
Configuration conf = new Configuration();
conf.setLong(AuditLogDirectParser.AUDIT_START_TIMESTAMP_KEY,
START_TIMESTAMP);
parser.initialize(conf);
}
private Text getAuditString(String timestamp, String ugi, String cmd,
String src, String dst) {
return new Text(
String.format("%s INFO FSNamesystem.audit: "
+ "allowed=true\tugi=%s\tip=0.0.0.0\tcmd=%s\tsrc=%s\t"
+ "dst=%s\tperm=null\tproto=rpc",
timestamp, ugi, cmd, src, dst));
}
@Test
public void testSimpleInput() throws Exception {
Text in = getAuditString("1970-01-01 00:00:11,000", "fakeUser",
"listStatus", "sourcePath", "null");
AuditReplayCommand expected = new AuditReplayCommand(1000, "fakeUser",
"listStatus", "sourcePath", "null", "0.0.0.0");
assertEquals(expected, parser.parse(in, Function.identity()));
}
@Test
public void testInputWithRenameOptions() throws Exception {
Text in = getAuditString("1970-01-01 00:00:11,000", "fakeUser",
"rename (options=[TO_TRASH])", "sourcePath", "destPath");
AuditReplayCommand expected = new AuditReplayCommand(1000, "fakeUser",
"rename (options=[TO_TRASH])", "sourcePath", "destPath", "0.0.0.0");
assertEquals(expected, parser.parse(in, Function.identity()));
}
@Test
public void testInputWithTokenAuth() throws Exception {
Text in = getAuditString("1970-01-01 00:00:11,000", "fakeUser (auth:TOKEN)",
"create", "sourcePath", "null");
AuditReplayCommand expected = new AuditReplayCommand(1000, "fakeUser",
"create", "sourcePath", "null", "0.0.0.0");
assertEquals(expected, parser.parse(in, Function.identity()));
}
@Test
public void testInputWithProxyUser() throws Exception {
Text in = getAuditString("1970-01-01 00:00:11,000",
"proxyUser (auth:TOKEN) via fakeUser", "create", "sourcePath", "null");
AuditReplayCommand expected = new AuditReplayCommand(1000, "proxyUser",
"create", "sourcePath", "null", "0.0.0.0");
assertEquals(expected, parser.parse(in, Function.identity()));
}
@Test
public void testParseDefaultDateFormat() throws Exception {
Text in = getAuditString("1970-01-01 13:00:00,000",
"ignored", "ignored", "ignored", "ignored");
AuditReplayCommand expected = new AuditReplayCommand(
13 * 60 * 60 * 1000 - START_TIMESTAMP,
"ignored", "ignored", "ignored", "ignored", "0.0.0.0");
assertEquals(expected, parser.parse(in, Function.identity()));
}
@Test
public void testParseCustomDateFormat() throws Exception {
parser = new AuditLogDirectParser();
Configuration conf = new Configuration();
conf.setLong(AuditLogDirectParser.AUDIT_START_TIMESTAMP_KEY, 0);
conf.set(AuditLogDirectParser.AUDIT_LOG_DATE_FORMAT_KEY,
"yyyy-MM-dd hh:mm:ss,SSS a");
parser.initialize(conf);
Text in = getAuditString("1970-01-01 01:00:00,000 PM",
"ignored", "ignored", "ignored", "ignored");
AuditReplayCommand expected = new AuditReplayCommand(13 * 60 * 60 * 1000,
"ignored", "ignored", "ignored", "ignored", "0.0.0.0");
assertEquals(expected, parser.parse(in, Function.identity()));
}
@Test
public void testParseCustomTimeZone() throws Exception {
parser = new AuditLogDirectParser();
Configuration conf = new Configuration();
conf.setLong(AuditLogDirectParser.AUDIT_START_TIMESTAMP_KEY, 0);
conf.set(AuditLogDirectParser.AUDIT_LOG_DATE_TIME_ZONE_KEY, "Etc/GMT-1");
parser.initialize(conf);
Text in = getAuditString("1970-01-01 01:00:00,000",
"ignored", "ignored", "ignored", "ignored");
AuditReplayCommand expected = new AuditReplayCommand(0,
"ignored", "ignored", "ignored", "ignored", "0.0.0.0");
assertEquals(expected, parser.parse(in, Function.identity()));
}
@Test
public void testParseCustomAuditLineFormat() throws Exception {
Text auditLine = new Text("CUSTOM FORMAT (1970-01-01 00:00:00,000) "
+ "allowed=true\tugi=fakeUser\tip=0.0.0.0\tcmd=fakeCommand\tsrc=src\t"
+ "dst=null\tperm=null\tproto=rpc");
parser = new AuditLogDirectParser();
Configuration conf = new Configuration();
conf.setLong(AuditLogDirectParser.AUDIT_START_TIMESTAMP_KEY, 0);
conf.set(AuditLogDirectParser.AUDIT_LOG_PARSE_REGEX_KEY,
"CUSTOM FORMAT \\((?<timestamp>.+?)\\) (?<message>.+)");
parser.initialize(conf);
AuditReplayCommand expected = new AuditReplayCommand(0,
"fakeUser", "fakeCommand", "src", "null", "0.0.0.0");
assertEquals(expected, parser.parse(auditLine, Function.identity()));
}
}

View File

@ -0,0 +1,6 @@
1970-01-01 00:00:01,010 INFO FSNamesystem.audit: allowed=true ugi=hdfs ip=/0.0.0.0 cmd=create src=/tmp/test1 dst=null perm=null proto=rpc
1970-01-01 00:00:01,020 INFO FSNamesystem.audit: allowed=true ugi=hdfs ip=/0.0.0.0 cmd=mkdirs src=/tmp/testDir dst=null perm=null proto=rpc
1970-01-01 00:00:01,030 INFO FSNamesystem.audit: allowed=true ugi=hdfs ip=/0.0.0.0 cmd=rename src=/tmp/testDir dst=/tmp/testDirRenamed perm=null proto=rpc
1970-01-01 00:00:01,040 INFO FSNamesystem.audit: allowed=true ugi=hdfs@REALM.COM ip=/0.0.0.0 cmd=mkdirs src=/tmp/testDir2 dst=null perm=null proto=rpc
1970-01-01 00:00:01,050 INFO FSNamesystem.audit: allowed=true ugi=hdfs/127.0.0.1@REALM.COM ip=/0.0.0.0 cmd=mkdirs src=/tmp/testDir3 dst=null perm=null proto=rpc
1970-01-01 00:00:01,060 INFO FSNamesystem.audit: allowed=true ugi=otherUser ip=/0.0.0.0 cmd=mkdirs src=/denied dst=null perm=null proto=rpc

View File

@ -0,0 +1,6 @@
+10hdfscreate/tmp/test1 0.0.0.0
+20hdfsmkdirs/tmp/testDir 0.0.0.0
+30hdfsrename/tmp/testDir/tmp/testDirRenamed0.0.0.0
+40hdfs@REALM.COMmkdirs/tmp/testDir2 0.0.0.0
+50hdfs/127.0.0.1@REALM.COMmkdirs/tmp/testDir3 0.0.0.0
+60otherUsermkdirs/denied 0.0.0.0

View File

@ -0,0 +1,55 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-project</artifactId>
<version>3.3.0-SNAPSHOT</version>
<relativePath>../../hadoop-project</relativePath>
</parent>
<artifactId>hadoop-dynamometer</artifactId>
<version>3.3.0-SNAPSHOT</version>
<description>Apache Hadoop Dynamometer</description>
<name>Apache Hadoop Dynamometer</name>
<packaging>pom</packaging>
<modules>
<module>hadoop-dynamometer-infra</module>
<module>hadoop-dynamometer-workload</module>
<module>hadoop-dynamometer-blockgen</module>
<module>hadoop-dynamometer-dist</module>
</modules>
<build>
<plugins>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<configuration>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -32,6 +32,7 @@
<modules>
<module>hadoop-streaming</module>
<module>hadoop-distcp</module>
<module>hadoop-dynamometer</module>
<module>hadoop-archives</module>
<module>hadoop-archive-logs</module>
<module>hadoop-rumen</module>