diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml index 4868590ac4..57f3c66dad 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml @@ -22,7 +22,6 @@ dir false - hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/target/native/target/usr/local/bin @@ -33,7 +32,7 @@ hadoop-yarn/bin bin - * + yarn 0755 @@ -41,17 +40,81 @@ bin bin - * + mapred + + 0755 + + + bin + libexec + + mapred-config.sh + + 0755 + + + hadoop-yarn/bin + libexec + + yarn-config.sh + + 0755 + + + hadoop-yarn/bin + sbin + + yarn-daemon.sh + yarn-daemons.sh + start-yarn.sh + stop-yarn.sh 0755 hadoop-yarn/conf - conf + etc/hadoop **/* + + ${basedir} + /share/doc/hadoop/${hadoop.component} + + *.txt + + + + ${project.build.directory}/webapps + /share/hadoop/${hadoop.component}/webapps + + + ${basedir}/src/main/conf + /share/hadoop/${hadoop.component}/templates + + *-site.xml + + + + ${basedir}/src/main/packages/templates/conf + /share/hadoop/${hadoop.component}/templates/conf + + * + + + + ${basedir}/dev-support/jdiff + /share/hadoop/${hadoop.component}/jdiff + + + ${project.build.directory}/site/jdiff/xml + /share/hadoop/${hadoop.component}/jdiff + + + ${project.build.directory}/site + /share/doc/hadoop/${hadoop.component} + @@ -59,7 +122,7 @@ org.apache.hadoop:hadoop-yarn-server-tests - modules + share/hadoop/${hadoop.component} false false @@ -68,7 +131,7 @@ false - /lib + /share/hadoop/${hadoop.component}/lib org.apache.hadoop:hadoop-common diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 21eda1b32f..aba8ce050c 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -141,6 +141,9 @@ Trunk (unreleased changes) HADOOP-7892. IPC logs too verbose after "RpcKind" introduction (todd) + HADOOP-7931. o.a.h.ipc.WritableRpcEngine should have a way to force + initialization (atm) + OPTIMIZATIONS HADOOP-7761. Improve the performance of raw comparisons. (todd) diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh index e53ec737f5..71c9481714 100644 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh @@ -231,6 +231,23 @@ fi CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/share/hadoop/hdfs'/*' +# put yarn in classpath if present +if [ "$YARN_HOME" = "" ]; then + if [ -d "${HADOOP_PREFIX}/share/hadoop/mapreduce" ]; then + YARN_HOME=$HADOOP_PREFIX + fi +fi + +if [ -d "$YARN_HOME/share/hadoop/mapreduce/webapps" ]; then + CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce +fi + +if [ -d "$YARN_HOME/share/hadoop/mapreduce/lib" ]; then + CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce/lib'/*' +fi + +CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce'/*' + # cygwin path translation if $cygwin; then HADOOP_HDFS_HOME=`cygpath -w "$HADOOP_HDFS_HOME"` diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java index 25f46f13e2..19a496809b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java @@ -48,17 +48,38 @@ public class WritableRpcEngine implements RpcEngine { private static final Log LOG = LogFactory.getLog(RPC.class); - static { // Register the rpcRequest deserializer for WritableRpcEngine - org.apache.hadoop.ipc.Server.registerProtocolEngine(RpcKind.RPC_WRITABLE, - Invocation.class, new Server.WritableRpcInvoker()); - } - - //writableRpcVersion should be updated if there is a change //in format of the rpc messages. // 2L - added declared class to Invocation - public static final long writableRpcVersion = 2L; + public static final long writableRpcVersion = 2L; + + /** + * Whether or not this class has been initialized. + */ + private static boolean isInitialized = false; + + static { + ensureInitialized(); + } + + /** + * Initialize this class if it isn't already. + */ + public static synchronized void ensureInitialized() { + if (!isInitialized) { + initialize(); + } + } + + /** + * Register the rpcRequest deserializer for WritableRpcEngine + */ + private static synchronized void initialize() { + org.apache.hadoop.ipc.Server.registerProtocolEngine(RpcKind.RPC_WRITABLE, + Invocation.class, new Server.WritableRpcInvoker()); + isInitialized = true; + } /** A method invocation, including the method name and its parameters.*/ diff --git a/hadoop-dist/pom.xml b/hadoop-dist/pom.xml index ed6b729a93..93fe32be25 100644 --- a/hadoop-dist/pom.xml +++ b/hadoop-dist/pom.xml @@ -76,6 +76,9 @@ dist false + + tar|rpm|deb + @@ -114,15 +117,6 @@ run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${project.version}/* . run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs-httpfs/target/hadoop-hdfs-httpfs-${project.version}/* . run cp -r $ROOT/hadoop-mapreduce-project/target/hadoop-mapreduce-${project.version}/* . - COMMON_LIB=share/hadoop/common/lib - MODULES=../../../../modules - run ln -s $MODULES/hadoop-mapreduce-client-app-${project.version}.jar $COMMON_LIB - run ln -s $MODULES/hadoop-yarn-api-${project.version}.jar $COMMON_LIB - run ln -s $MODULES/hadoop-mapreduce-client-common-${project.version}.jar $COMMON_LIB - run ln -s $MODULES/hadoop-yarn-common-${project.version}.jar $COMMON_LIB - run ln -s $MODULES/hadoop-mapreduce-client-core-${project.version}.jar $COMMON_LIB - run ln -s $MODULES/hadoop-yarn-server-common-${project.version}.jar $COMMON_LIB - run ln -s $MODULES/hadoop-mapreduce-client-jobclient-${project.version}.jar $COMMON_LIB echo echo "Hadoop dist layout available at: ${project.build.directory}/hadoop-${project.version}" echo diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 86d1b32efe..406a3a2d8b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -170,6 +170,8 @@ Trunk (unreleased changes) HDFS-1765. Block Replication should respect under-replication block priority. (Uma Maheswara Rao G via eli) + HDFS-2694. Removal of Avro broke non-PB NN services. (atm) + Release 0.23.1 - UNRELEASED INCOMPATIBLE CHANGES @@ -265,6 +267,8 @@ Release 0.23.1 - UNRELEASED HDFS-2649. eclipse:eclipse build fails for hadoop-hdfs-httpfs. (Jason Lowe via eli) + HDFS-2640. Javadoc generation hangs. (tomwhite) + Release 0.23.0 - 2011-11-01 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml index 510f9d0140..a6fbee13c6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml @@ -324,6 +324,13 @@ + + org.apache.maven.plugins + maven-javadoc-plugin + + org.apache.hadoop.hdfs.protocol.proto + + org.apache.rat apache-rat-plugin diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index 874cb5febf..d4c5ef0850 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -100,6 +100,7 @@ import org.apache.hadoop.ipc.ProtocolSignature; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.Server; +import org.apache.hadoop.ipc.WritableRpcEngine; import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind; import org.apache.hadoop.net.Node; import org.apache.hadoop.security.AccessControlException; @@ -167,6 +168,8 @@ public NameNodeRpcServer(Configuration conf, NameNode nn) BlockingService NNPbService = NamenodeProtocolService .newReflectiveBlockingService(namenodeProtocolXlator); + WritableRpcEngine.ensureInitialized(); + InetSocketAddress dnSocketAddr = nn.getServiceRpcServerAddress(conf); if (dnSocketAddr != null) { int serviceHandlerCount = diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 041e83ff66..010b34aa65 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -6,6 +6,7 @@ Trunk (unreleased changes) MAPREDUCE-3545. Remove Avro RPC. (suresh) NEW FEATURES + MAPREDUCE-778. Rumen Anonymizer. (Amar Kamat and Chris Douglas via amarrk) MAPREDUCE-2669. Add new examples for Mean, Median, and Standard Deviation. (Plamen Jeliazkov via shv) @@ -314,6 +315,22 @@ Release 0.23.1 - Unreleased MAPREDUCE-3560. TestRMNodeTransitions is failing on trunk. (Siddharth Seth via mahadev) + MAPREDUCE-3487. Fixed JobHistory web-UI to display links to single task's + counters' page. (Jason Lowe via vinodkv) + + MAPREDUCE-3564. Fixed failures in TestStagingCleanup and TestJobEndNotifier + tests. (Siddharth Seth via vinodkv) + + MAPREDUCE-3422. Counter display names are not being picked up. (Jonathan + Eagles via sseth) + + MAPREDUCE-3366. Mapreduce component should use consistent directory structure + layout as HDFS/common (Eric Yang via mahadev) + + MAPREDUCE-3387. Fixed AM's tracking URL to always go through the proxy, even + before the job started, so that it works properly with oozie throughout + the job execution. (Robert Joseph Evans via vinodkv) + Release 0.23.0 - 2011-11-01 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/INSTALL b/hadoop-mapreduce-project/INSTALL index e6de8cb92e..e75b2aff2f 100644 --- a/hadoop-mapreduce-project/INSTALL +++ b/hadoop-mapreduce-project/INSTALL @@ -55,11 +55,11 @@ Step 8) Modify mapred-site.xml to use yarn framework Step 9) cd $YARN_HOME -Step 10) bin/yarn-daemon.sh start resourcemanager +Step 10) sbin/yarn-daemon.sh start resourcemanager -Step 11) bin/yarn-daemon.sh start nodemanager +Step 11) sbin/yarn-daemon.sh start nodemanager -Step 12) bin/yarn-daemon.sh start historyserver +Step 12) sbin/yarn-daemon.sh start historyserver Step 13) You are all set, an example on how to run a mapreduce job is: cd $HADOOP_MAPRED_HOME diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java index 33c1fd3cc0..5c2e0fd0c8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java @@ -375,15 +375,17 @@ public void handle(JobFinishEvent event) { // this is the only job, so shut down the Appmaster // note in a workflow scenario, this may lead to creation of a new // job (FIXME?) - try { - LOG.info("Job end notification started for jobID : " - + job.getReport().getJobId()); - JobEndNotifier notifier = new JobEndNotifier(); - notifier.setConf(getConfig()); - notifier.notify(job.getReport()); - } catch (InterruptedException ie) { - LOG.warn("Job end notification interrupted for jobID : " - + job.getReport().getJobId(), ie ); + if (getConfig().get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL) != null) { + try { + LOG.info("Job end notification started for jobID : " + + job.getReport().getJobId()); + JobEndNotifier notifier = new JobEndNotifier(); + notifier.setConf(getConfig()); + notifier.notify(job.getReport()); + } catch (InterruptedException ie) { + LOG.warn("Job end notification interrupted for jobID : " + + job.getReport().getJobId(), ie); + } } // TODO:currently just wait for some time so clients can know the diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java index cf6ab99a93..6accd8add7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java @@ -120,7 +120,7 @@ public class CountersBlock extends HtmlBlock { // Ditto TR>>>>>>> groupRow = group. tr(); - if (mg == null && rg == null) { + if (task == null && mg == null && rg == null) { groupRow.td().$title(counter.getName())._(counter.getDisplayName()). _(); } else { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterBlock.java index 1ec774e3fb..bb72822542 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterBlock.java @@ -24,6 +24,7 @@ import org.apache.hadoop.mapreduce.v2.api.records.Counter; import org.apache.hadoop.mapreduce.v2.api.records.CounterGroup; +import org.apache.hadoop.mapreduce.v2.api.records.Counters; import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; @@ -120,8 +121,9 @@ private void populateMembers(AppContext ctx) { for(Map.Entry entry : task.getAttempts().entrySet()) { long value = 0; - CounterGroup group = entry.getValue().getCounters() - .getCounterGroup($(COUNTER_GROUP)); + Counters counters = entry.getValue().getCounters(); + CounterGroup group = (counters != null) + ? counters.getCounterGroup($(COUNTER_GROUP)) : null; if(group != null) { Counter c = group.getCounter($(COUNTER_NAME)); if(c != null) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java index 3cf6ea9c70..52ca1cf3f8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java @@ -96,8 +96,8 @@ public void testNotifyRetries() throws InterruptedException { conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_URL, "http://nonexistent"); conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3"); conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3"); - conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3"); - conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3"); + conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000"); + conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000"); JobReport jobReport = Mockito.mock(JobReport.class); long startTime = System.currentTimeMillis(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java index 5146acb599..e0dbac97b6 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java @@ -73,6 +73,7 @@ public void testDeletionofStaging() throws IOException { JobId jobid = recordFactory.newRecordInstance(JobId.class); jobid.setAppId(appId); MRAppMaster appMaster = new TestMRApp(attemptId); + appMaster.init(conf); EventHandler handler = appMaster.createJobFinishEventHandler(); handler.handle(new JobFinishEvent(jobid)); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java index 745eedcb86..691ff657cd 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java @@ -31,6 +31,7 @@ import org.apache.hadoop.mapreduce.v2.app.MockJobs; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.Task; +import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.yarn.Clock; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -153,7 +154,7 @@ public static Map getTaskParams(AppContext appContext) { e.getValue().getType(); Map params = new HashMap(); params.put(AMParams.JOB_ID, MRApps.toString(jobId)); - params.put(AMParams.TASK_ID, e.getKey().toString()); + params.put(AMParams.TASK_ID, MRApps.toString(e.getKey())); params.put(AMParams.TASK_TYPE, MRApps.taskSymbol(e.getValue().getType())); return params; } @@ -179,6 +180,32 @@ public static Map getTaskParams(AppContext appContext) { WebAppTests.testPage(SingleCounterPage.class, AppContext.class, appContext, params); } + + @Test public void testTaskCountersView() { + AppContext appContext = new TestAppContext(); + Map params = getTaskParams(appContext); + WebAppTests.testPage(CountersPage.class, AppContext.class, + appContext, params); + } + + @Test public void testSingleTaskCounterView() { + AppContext appContext = new TestAppContext(0, 1, 1, 2); + Map params = getTaskParams(appContext); + params.put(AMParams.COUNTER_GROUP, + "org.apache.hadoop.mapreduce.FileSystemCounter"); + params.put(AMParams.COUNTER_NAME, "HDFS_WRITE_OPS"); + + // remove counters from one task attempt + // to test handling of missing counters + TaskId taskID = MRApps.toTaskID(params.get(AMParams.TASK_ID)); + Job job = appContext.getJob(taskID.getJobId()); + Task task = job.getTask(taskID); + TaskAttempt attempt = task.getAttempts().values().iterator().next(); + attempt.getReport().setCounters(null); + + WebAppTests.testPage(SingleCounterPage.class, AppContext.class, + appContext, params); + } public static void main(String[] args) { WebApps.$for("yarn", AppContext.class, new TestAppContext(0, 8, 88, 4)). diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/FileSystemCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/FileSystemCounter.properties similarity index 100% rename from hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/FileSystemCounter.properties rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/FileSystemCounter.properties diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/JobCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/JobCounter.properties similarity index 100% rename from hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/JobCounter.properties rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/JobCounter.properties diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/TaskCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/TaskCounter.properties similarity index 100% rename from hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/TaskCounter.properties rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/TaskCounter.properties diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormatCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/lib/input/FileInputFormatCounter.properties similarity index 100% rename from hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormatCounter.properties rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/lib/input/FileInputFormatCounter.properties diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormatCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/lib/output/FileOutputFormatCounter.properties similarity index 100% rename from hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormatCounter.properties rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/lib/output/FileOutputFormatCounter.properties diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/slaves.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/slaves.sh index ee83477901..ee254603d6 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/slaves.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/slaves.sh @@ -38,7 +38,7 @@ fi bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` -DEFAULT_LIBEXEC_DIR="$bin" +DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} . $HADOOP_LIBEXEC_DIR/yarn-config.sh diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/start-all.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/start-yarn.sh similarity index 97% rename from hadoop-mapreduce-project/hadoop-yarn/bin/start-all.sh rename to hadoop-mapreduce-project/hadoop-yarn/bin/start-yarn.sh index fa4fcf3d0d..ccd63a4478 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/start-all.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/start-yarn.sh @@ -23,7 +23,7 @@ echo "starting yarn daemons" bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` -DEFAULT_LIBEXEC_DIR="$bin" +DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} . $HADOOP_LIBEXEC_DIR/yarn-config.sh diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/stop-all.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/stop-yarn.sh similarity index 97% rename from hadoop-mapreduce-project/hadoop-yarn/bin/stop-all.sh rename to hadoop-mapreduce-project/hadoop-yarn/bin/stop-yarn.sh index 546b67f5c9..c10d1ce7d1 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/stop-all.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/stop-yarn.sh @@ -23,7 +23,7 @@ echo "stopping yarn daemons" bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` -DEFAULT_LIBEXEC_DIR="$bin" +DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} . $HADOOP_LIBEXEC_DIR/yarn-config.sh diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn index b8e23a97f5..f5c8c1f8e8 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn @@ -44,7 +44,7 @@ bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` -DEFAULT_LIBEXEC_DIR="$bin" +DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} . $HADOOP_LIBEXEC_DIR/yarn-config.sh @@ -109,8 +109,7 @@ if [ ! -d "$HADOOP_CONF_DIR" ]; then exit 1 fi -CLASSPATH="${HADOOP_CONF_DIR}:${YARN_CONF_DIR}" -CLASSPATH=${CLASSPATH}:${YARN_CLASSPATH} +CLASSPATH="${HADOOP_CONF_DIR}:${YARN_CONF_DIR}:${CLASSPATH}" CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar # for developers, add Hadoop classes to CLASSPATH @@ -146,38 +145,6 @@ fi # so that filenames w/ spaces are handled correctly in loops below IFS= -# add hadoop-common libs to CLASSPATH -if [ ! -d "$HADOOP_COMMON_HOME" ]; then - if [ -d "$HADOOP_PREFIX" ]; then - export HADOOP_COMMON_HOME=$HADOOP_PREFIX - else - echo No HADOOP_COMMON_HOME set. - echo Please specify it either in yarn-env.sh or in the environment. - exit 1 - fi -fi - -CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/share/hadoop/common'/*' -CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/share/hadoop/common/lib'/*' - -# add hadoop-hdfs libs to CLASSPATH -if [ ! -d "$HADOOP_HDFS_HOME" ]; then - if [ -d "$HADOOP_PREFIX" ]; then - export HADOOP_HDFS_HOME=$HADOOP_PREFIX - else - echo No HADOOP_HDFS_HOME set. - echo Please specify it either in yarn-env.sh or in the environment. - exit 1 - fi -fi -CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/share/hadoop/hdfs'/*' -CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib'/*' - -# add yarn libs to CLASSPATH - -CLASSPATH=${CLASSPATH}:$YARN_HOME/modules'/*' -CLASSPATH=${CLASSPATH}:$YARN_HOME/lib'/*' - # default log directory & file if [ "$YARN_LOG_DIR" = "" ]; then YARN_LOG_DIR="$YARN_HOME/logs" diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh index 4371484b86..2757044273 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh @@ -15,29 +15,24 @@ # included in all the hadoop scripts with source command # should not be executable directly -# also should not be passed any arguments, since we need original $* - -# resolve links - $0 may be a softlink - -this="$0" -while [ -h "$this" ]; do - ls=`ls -ld "$this"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '.*/.*' > /dev/null; then - this="$link" - else - this=`dirname "$this"`/"$link" - fi -done - -# convert relative path to absolute path -bin=`dirname "$this"` -script=`basename "$this"` +bin=`which "$0"` +bin=`dirname "${bin}"` bin=`cd "$bin"; pwd` -this="$bin/$script" -# the root of the Hadoop installation -export YARN_HOME=`dirname "$this"`/.. +export HADOOP_PREFIX="${HADOOP_PREFIX:-$bin/..}" + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then + . ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh +elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then + . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh +elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then + . "$HADOOP_HOME"/libexec/hadoop-config.sh +else + echo "Hadoop common not found." + exit +fi # Same glibc bug that discovered in Hadoop. # Without this you can see very large vmem settings on containers. @@ -56,7 +51,7 @@ then fi # Allow alternate conf dir location. -YARN_CONF_DIR="${YARN_CONF_DIR:-$YARN_HOME/conf}" +YARN_CONF_DIR="${HADOOP_CONF_DIR:-$YARN_HOME/conf}" #check to see it is specified whether to use the slaves or the # masters file diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh index 99fcb0a550..6e41f791c3 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh @@ -39,7 +39,7 @@ fi bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` -DEFAULT_LIBEXEC_DIR="$bin" +DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} . $HADOOP_LIBEXEC_DIR/yarn-config.sh diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemons.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemons.sh index e34e4ca8b1..aafb42b9b1 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemons.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemons.sh @@ -30,7 +30,7 @@ fi bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` -DEFAULT_LIBEXEC_DIR="$bin" +DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} . $HADOOP_LIBEXEC_DIR/yarn-config.sh diff --git a/hadoop-mapreduce-project/hadoop-yarn/conf/yarn-env.sh b/hadoop-mapreduce-project/hadoop-yarn/conf/yarn-env.sh index b219eddf1a..cfcb250b8e 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/conf/yarn-env.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/conf/yarn-env.sh @@ -41,10 +41,6 @@ if [ "$YARN_HEAPSIZE" != "" ]; then #echo $JAVA_HEAP_MAX fi -# CLASSPATH initially contains $YARN_CONF_DIR -CLASSPATH="${YARN_CONF_DIR}" -CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar - # so that filenames w/ spaces are handled correctly in loops below IFS= diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java index c4d0d78ea5..9439e21cfa 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java @@ -95,8 +95,8 @@ public interface ApplicationConstants { "$HADOOP_COMMON_HOME/share/hadoop/common/lib/*", "$HADOOP_HDFS_HOME/share/hadoop/hdfs/*", "$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*", - "$YARN_HOME/modules/*", - "$YARN_HOME/lib/*" + "$YARN_HOME/share/hadoop/mapreduce/*", + "$YARN_HOME/share/hadoop/mapreduce/lib/*" }; /** diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 71dd982b60..0f695fda9f 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -273,6 +273,8 @@ public RMAppAttemptImpl(ApplicationAttemptId appAttemptId, this.readLock = lock.readLock(); this.writeLock = lock.writeLock(); + this.proxiedTrackingUrl = generateProxyUriWithoutScheme(); + this.stateMachine = stateMachineFactory.make(this); } @@ -358,11 +360,16 @@ public String getWebProxyBase() { } } + private String generateProxyUriWithoutScheme() { + return generateProxyUriWithoutScheme(null); + } + private String generateProxyUriWithoutScheme( final String trackingUriWithoutScheme) { this.readLock.lock(); try { - URI trackingUri = ProxyUriUtils.getUriFromAMUrl(trackingUriWithoutScheme); + URI trackingUri = trackingUriWithoutScheme == null ? null : + ProxyUriUtils.getUriFromAMUrl(trackingUriWithoutScheme); URI proxyUri = ProxyUriUtils.getUriFromAMUrl(proxy); URI result = ProxyUriUtils.getProxyUri(trackingUri, proxyUri, applicationAttemptId.getApplicationId()); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java index b2600ae0ea..f7f54aae3f 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java @@ -80,8 +80,10 @@ public AppInfo(RMApp app, Boolean hasAccess) { if (app != null) { String trackingUrl = app.getTrackingUrl(); + this.state = app.getState(); this.trackingUrlIsNotReady = trackingUrl == null || trackingUrl.isEmpty() - || "N/A".equalsIgnoreCase(trackingUrl); + || RMAppState.NEW == this.state || RMAppState.SUBMITTED == this.state + || RMAppState.ACCEPTED == this.state; this.trackingUI = this.trackingUrlIsNotReady ? "UNASSIGNED" : (app .getFinishTime() == 0 ? "ApplicationMaster" : "History"); if (!trackingUrlIsNotReady) { @@ -95,7 +97,6 @@ public AppInfo(RMApp app, Boolean hasAccess) { this.user = app.getUser().toString(); this.name = app.getName().toString(); this.queue = app.getQueue().toString(); - this.state = app.getState(); this.progress = app.getProgress() * 100; this.diagnostics = app.getDiagnostics().toString(); if (diagnostics == null || diagnostics.isEmpty()) { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index 09699fbf91..1059e58ab9 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -17,8 +17,7 @@ */ package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; +import static org.junit.Assert.*; import static org.mockito.Matchers.*; import static org.mockito.Mockito.*; @@ -203,6 +202,8 @@ private void testAppAttemptNewState() { assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001); assertEquals(0, applicationAttempt.getRanNodes().size()); assertNull(applicationAttempt.getFinalApplicationStatus()); + assertNotNull(applicationAttempt.getTrackingUrl()); + assertFalse("N/A".equals(applicationAttempt.getTrackingUrl())); } /** diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java index e9bc0c81f8..61e31eee93 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java @@ -114,7 +114,8 @@ private static boolean appendQuery(StringBuilder builder, String query, /** * Get a proxied URI for the original URI. - * @param originalUri the original URI to go through the proxy + * @param originalUri the original URI to go through the proxy, or null if + * a default path "/" can be used. * @param proxyUri the URI of the proxy itself, scheme, host and port are used. * @param id the id of the application * @return the proxied URI @@ -122,9 +123,10 @@ private static boolean appendQuery(StringBuilder builder, String query, public static URI getProxyUri(URI originalUri, URI proxyUri, ApplicationId id) { try { - String path = getPath(id, originalUri.getPath()); + String path = getPath(id, originalUri == null ? "/" : originalUri.getPath()); return new URI(proxyUri.getScheme(), proxyUri.getAuthority(), path, - originalUri.getQuery(), originalUri.getFragment()); + originalUri == null ? null : originalUri.getQuery(), + originalUri == null ? null : originalUri.getFragment()); } catch (URISyntaxException e) { throw new RuntimeException("Could not proxify "+originalUri,e); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestProxyUriUtils.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestProxyUriUtils.java index 2f83b6e38c..16ee7beded 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestProxyUriUtils.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestProxyUriUtils.java @@ -23,44 +23,16 @@ import java.net.URI; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.util.BuilderUtils; import org.junit.Test; public class TestProxyUriUtils { - public static class TestAppId extends ApplicationId { - private long timestamp; - private int id; - - public TestAppId(int id, long timestamp) { - setId(id); - setClusterTimestamp(timestamp); - } - @Override - public int getId() { - return id; - } - - @Override - public void setId(int id) { - this.id = id; - } - - @Override - public long getClusterTimestamp() { - return timestamp; - } - - @Override - public void setClusterTimestamp(long clusterTimestamp) { - this.timestamp = clusterTimestamp; - } - } - @Test public void testGetPathApplicationId() { assertEquals("/proxy/application_100_0001", - ProxyUriUtils.getPath(new TestAppId(1, 100l))); + ProxyUriUtils.getPath(BuilderUtils.newApplicationId(100l, 1))); assertEquals("/proxy/application_6384623_0005", - ProxyUriUtils.getPath(new TestAppId(5, 6384623l))); + ProxyUriUtils.getPath(BuilderUtils.newApplicationId(6384623l, 5))); } @Test(expected = IllegalArgumentException.class) @@ -71,23 +43,23 @@ public void testGetPathApplicationIdBad() { @Test public void testGetPathApplicationIdString() { assertEquals("/proxy/application_6384623_0005", - ProxyUriUtils.getPath(new TestAppId(5, 6384623l), null)); + ProxyUriUtils.getPath(BuilderUtils.newApplicationId(6384623l, 5), null)); assertEquals("/proxy/application_6384623_0005/static/app", - ProxyUriUtils.getPath(new TestAppId(5, 6384623l), "/static/app")); + ProxyUriUtils.getPath(BuilderUtils.newApplicationId(6384623l, 5), "/static/app")); assertEquals("/proxy/application_6384623_0005/", - ProxyUriUtils.getPath(new TestAppId(5, 6384623l), "/")); + ProxyUriUtils.getPath(BuilderUtils.newApplicationId(6384623l, 5), "/")); assertEquals("/proxy/application_6384623_0005/some/path", - ProxyUriUtils.getPath(new TestAppId(5, 6384623l), "some/path")); + ProxyUriUtils.getPath(BuilderUtils.newApplicationId(6384623l, 5), "some/path")); } @Test public void testGetPathAndQuery() { assertEquals("/proxy/application_6384623_0005/static/app?foo=bar", - ProxyUriUtils.getPathAndQuery(new TestAppId(5, 6384623l), "/static/app", + ProxyUriUtils.getPathAndQuery(BuilderUtils.newApplicationId(6384623l, 5), "/static/app", "?foo=bar", false)); assertEquals("/proxy/application_6384623_0005/static/app?foo=bar&bad=good&proxyapproved=true", - ProxyUriUtils.getPathAndQuery(new TestAppId(5, 6384623l), "/static/app", + ProxyUriUtils.getPathAndQuery(BuilderUtils.newApplicationId(6384623l, 5), "/static/app", "foo=bar&bad=good", true)); } @@ -95,10 +67,20 @@ public void testGetPathAndQuery() { public void testGetProxyUri() throws Exception { URI originalUri = new URI("http://host.com/static/foo?bar=bar"); URI proxyUri = new URI("http://proxy.net:8080/"); - TestAppId id = new TestAppId(5, 6384623l); + ApplicationId id = BuilderUtils.newApplicationId(6384623l, 5); URI expected = new URI("http://proxy.net:8080/proxy/application_6384623_0005/static/foo?bar=bar"); URI result = ProxyUriUtils.getProxyUri(originalUri, proxyUri, id); assertEquals(expected, result); } + + @Test + public void testGetProxyUriNull() throws Exception { + URI originalUri = null; + URI proxyUri = new URI("http://proxy.net:8080/"); + ApplicationId id = BuilderUtils.newApplicationId(6384623l, 5); + URI expected = new URI("http://proxy.net:8080/proxy/application_6384623_0005/"); + URI result = ProxyUriUtils.getProxyUri(originalUri, proxyUri, id); + assertEquals(expected, result); + } } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/SingleCluster.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/SingleCluster.apt.vm index 3d34351708..f4ea1fe69c 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/SingleCluster.apt.vm +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/SingleCluster.apt.vm @@ -171,20 +171,6 @@ Add the following configs to your <<>> +---+ -* Create Symlinks. - - You will have to create the following symlinks: - -+---+ -$ cd $HADOOP_COMMON_HOME/share/hadoop/common/lib/ -$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-app-*-SNAPSHOT.jar . -$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-jobclient-*-SNAPSHOT.jar . -$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-common-*-SNAPSHOT.jar . -$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-shuffle-*-SNAPSHOT.jar . -$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-core-*-SNAPSHOT.jar . -$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-yarn-common-*-SNAPSHOT.jar . -$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-yarn-api-*-SNAPSHOT.jar . -+---+ * Running daemons. Assuming that the environment variables <<$HADOOP_COMMON_HOME>>, <<$HADOOP_HDFS_HOME>>, <<$HADOO_MAPRED_HOME>>, @@ -195,8 +181,8 @@ $ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-yarn-api-*-SNAPSHOT.jar . +---+ $ cd $HADOOP_MAPRED_HOME -$ bin/yarn-daemon.sh start resourcemanager -$ bin/yarn-daemon.sh start nodemanager +$ sbin/yarn-daemon.sh start resourcemanager +$ sbin/yarn-daemon.sh start nodemanager +---+ You should be up and running. You can run randomwriter as: diff --git a/hadoop-mapreduce-project/ivy.xml b/hadoop-mapreduce-project/ivy.xml index e9b38d077e..e04da7019b 100644 --- a/hadoop-mapreduce-project/ivy.xml +++ b/hadoop-mapreduce-project/ivy.xml @@ -139,6 +139,13 @@ + + + + + diff --git a/hadoop-mapreduce-project/ivy/libraries.properties b/hadoop-mapreduce-project/ivy/libraries.properties index 360c5a9967..93a10282fc 100644 --- a/hadoop-mapreduce-project/ivy/libraries.properties +++ b/hadoop-mapreduce-project/ivy/libraries.properties @@ -81,5 +81,6 @@ wagon-http.version=1.0-beta-2 xmlenc.version=0.52 xerces.version=1.4.4 +jackson.version=1.8.2 yarn.version=0.24.0-SNAPSHOT hadoop-mapreduce.version=0.24.0-SNAPSHOT diff --git a/hadoop-mapreduce-project/pom.xml b/hadoop-mapreduce-project/pom.xml index 74970dd5ee..b9e64473cf 100644 --- a/hadoop-mapreduce-project/pom.xml +++ b/hadoop-mapreduce-project/pom.xml @@ -34,6 +34,8 @@ 600000 once ${basedir} + mapreduce + true @@ -321,7 +323,10 @@ - release + dist + + false + @@ -336,16 +341,6 @@ - - - - - dist - - false - - - org.apache.maven.plugins maven-assembly-plugin @@ -367,7 +362,7 @@ - dist + package-mapreduce prepare-package single diff --git a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java index 9b6ed69f57..77ec697872 100644 --- a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java +++ b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java @@ -26,8 +26,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Delayed; import java.util.concurrent.TimeUnit; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.security.PrivilegedExceptionAction; import org.apache.hadoop.conf.Configuration; @@ -49,6 +47,7 @@ import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.tools.rumen.JobStory; +import static org.apache.hadoop.tools.rumen.datatypes.util.MapReduceJobPropertiesParser.extractMaxHeapOpts; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -92,8 +91,6 @@ protected Formatter initialValue() { // configuration key to enable/disable task jvm options static final String GRIDMIX_TASK_JVM_OPTIONS_ENABLE = "gridmix.task.jvm-options.enable"; - private static final Pattern maxHeapPattern = - Pattern.compile("-Xmx[0-9]+[kKmMgGtT]?+"); private static void setJobQueue(Job job, String queue) { if (queue != null) { @@ -225,18 +222,6 @@ private static void configureTaskJVMMaxHeapOptions(Configuration srcConf, } } } - - private static void extractMaxHeapOpts(String javaOptions, - List maxOpts, List others) { - for (String opt : javaOptions.split(" ")) { - Matcher matcher = maxHeapPattern.matcher(opt); - if (matcher.find()) { - maxOpts.add(opt); - } else { - others.add(opt); - } - } - } // Scales the desired job-level configuration parameter. This API makes sure // that the ratio of the job level configuration parameter to the cluster diff --git a/hadoop-mapreduce-project/src/docs/src/documentation/content/xdocs/rumen.xml b/hadoop-mapreduce-project/src/docs/src/documentation/content/xdocs/rumen.xml index 75b97ac5e8..dbe72c56ca 100644 --- a/hadoop-mapreduce-project/src/docs/src/documentation/content/xdocs/rumen.xml +++ b/hadoop-mapreduce-project/src/docs/src/documentation/content/xdocs/rumen.xml @@ -73,6 +73,11 @@ computed for the total number of successful tasks for every attempt. +
  • Anonymized traces enables sharing of production traces of large + scale Hadoop deployments. Sharing of traces will foster + collaboration within the Hadoop community. It can also be used to + supplement interesting research findings. +
  • @@ -102,6 +107,11 @@ Increasing the trace runtime might involve adding some dummy jobs to the resulting trace and scaling up the runtime of individual jobs. +
  • Anonymizer : + A utility to anonymize Hadoop job and cluster topology traces by + masking certain sensitive fields but retaining important workload + characteristics. +
  • @@ -128,10 +138,11 @@ output-duration, concentration etc. -

    Rumen provides 2 basic commands

    +

    Rumen provides 3 basic commands

    • TraceBuilder
    • Folder
    • +
    • Anonymizer

    Firstly, we need to generate the Gold Trace. Hence the first @@ -139,8 +150,9 @@ The output of the TraceBuilder is a job-trace file (and an optional cluster-topology file). In case we want to scale the output, we can use the Folder utility to fold the current trace to the - desired length. The remaining part of this section explains these - utilities in detail. + desired length. For anonymizing the trace, use the + Anonymizer utility. The remaining part of this section + explains these utilities in detail.

    Examples in this section assumes that certain libraries are present @@ -426,8 +438,156 @@

    +

    +

    +

    +

    +

    + + + + +
    + Anonymizer + +

    Command:

    + java org.apache.hadoop.tools.rumen.Anonymizer [options] [-trace <jobtrace-input> <jobtrace-output>] [-topology <topology-input> <topology-output>] + +

    This command invokes the Anonymizer utility of + Rumen. It anonymizes sensitive information from the + <jobtrace-input> file and outputs the anonymized + content into the <jobtrace-output> + file. It also anonymizes the cluster layout (topology) from the + <topology-input> and outputs it in + the <topology-output> file. + <job-input> represents the job trace file obtained + using TraceBuilder or Folder. + <topology-input> represents the cluster topology + file obtained using TraceBuilder. +

    + +

    Options :

    + + + + + + + + + + + + + + + + +
    ParameterDescriptionNotes
    -traceAnonymizes job traces.Anonymizes sensitive fields like user-name, job-name, queue-name + host-names, job configuration parameters etc.
    -topologyAnonymizes cluster topologyAnonymizes rack-names and host-names.
    + +
    + <em>Anonymizer</em> Configuration Parameters +

    The Rumen anonymizer can be configured using the following + configuration parameters: +

    + + + + + + + + + + + + + + + + + + + + + + + + + +
    ParameterDescription
    + rumen.data-types.classname.preserve + A comma separated list of prefixes that the Anonymizer + will not anonymize while processing classnames. If + rumen.data-types.classname.preserve is set to + 'org.apache,com.hadoop.' then + classnames starting with 'org.apache' or + 'com.hadoop.' will not be anonymized. +
    + rumen.datatypes.jobproperties.parsers + A comma separated list of job properties parsers. These parsers + decide how the job configuration parameters + (i.e <key,value> pairs) should be processed. Default is + MapReduceJobPropertiesParser. The default parser will + only parse framework-level MapReduce specific job configuration + properties. Users can add custom parsers by implementing the + JobPropertiesParser interface. Rumen also provides an + all-pass (i.e no filter) parser called + DefaultJobPropertiesParser. +
    + rumen.anonymization.states.dir + Set this to a location (on LocalFileSystem or HDFS) for enabling + state persistence and/or reload. This parameter is not set by + default. Reloading and persistence of states depend on the state + directory. Note that the state directory will contain the latest + as well as previous states. +
    + rumen.anonymization.states.persist + Set this to 'true' to persist the current state. + Default value is 'false'. Note that the states will + be persisted to the state manager's state directory + specified using the rumen.anonymization.states.dir + parameter. +
    + rumen.anonymization.states.reload + Set this to 'true' to enable reuse of previously + persisted state. The default value is 'false'. The + previously persisted state will be reloaded from the state + manager's state directory specified using the + rumen.anonymization.states.dir parameter. Note that + the Anonymizer will bail out if it fails to find any + previously persisted state in the state directory or if the state + directory is not set. If the user wishes to retain/reuse the + states across multiple invocations of the Anonymizer, + then the very first invocation of the Anonymizer should + have rumen.anonymization.states.reload set to + 'false' and + rumen.anonymization.states.persist set to + 'true'. Subsequent invocations of the + Anonymizer can then have + rumen.anonymization.states.reload set to + 'true'. +
    +
    + +
    + Example + java org.apache.hadoop.tools.rumen.Anonymizer -trace file:///home/user/job-trace.json file:///home/user/job-trace-anonymized.json -topology file:///home/user/cluster-topology.json file:///home/user/cluster-topology-anonymized.json +

    +

    This will anonymize the job details from + file:///home/user/job-trace.json and output it to + file:///home/user/job-trace-anonymized.json. + It will also anonymize the cluster topology layout from + file:///home/user/cluster-topology.json and output it to + file:///home/user/cluster-topology-anonymized.json. + Note that the Anonymizer also supports input and output + files on HDFS. +

    +
    -