From c2fe4a7e83775589481210383f62faa2dcb00ecc Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Fri, 16 Dec 2011 01:51:15 +0000 Subject: [PATCH 1/9] MAPREDUCE-3487. Fixed JobHistory web-UI to display links to single task's counters' page. Contributed by Jason Lowe. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1215016 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 ++ .../v2/app/webapp/CountersBlock.java | 2 +- .../v2/app/webapp/SingleCounterBlock.java | 6 ++-- .../mapreduce/v2/app/webapp/TestAMWebApp.java | 29 ++++++++++++++++++- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 041e83ff66..919bb08bdc 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -314,6 +314,9 @@ Release 0.23.1 - Unreleased MAPREDUCE-3560. TestRMNodeTransitions is failing on trunk. (Siddharth Seth via mahadev) + MAPREDUCE-3487. Fixed JobHistory web-UI to display links to single task's + counters' page. (Jason Lowe via vinodkv) + Release 0.23.0 - 2011-11-01 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java index cf6ab99a93..6accd8add7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java @@ -120,7 +120,7 @@ public class CountersBlock extends HtmlBlock { // Ditto TR>>>>>>> groupRow = group. tr(); - if (mg == null && rg == null) { + if (task == null && mg == null && rg == null) { groupRow.td().$title(counter.getName())._(counter.getDisplayName()). _(); } else { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterBlock.java index 1ec774e3fb..bb72822542 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterBlock.java @@ -24,6 +24,7 @@ import org.apache.hadoop.mapreduce.v2.api.records.Counter; import org.apache.hadoop.mapreduce.v2.api.records.CounterGroup; +import org.apache.hadoop.mapreduce.v2.api.records.Counters; import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; @@ -120,8 +121,9 @@ private void populateMembers(AppContext ctx) { for(Map.Entry entry : task.getAttempts().entrySet()) { long value = 0; - CounterGroup group = entry.getValue().getCounters() - .getCounterGroup($(COUNTER_GROUP)); + Counters counters = entry.getValue().getCounters(); + CounterGroup group = (counters != null) + ? counters.getCounterGroup($(COUNTER_GROUP)) : null; if(group != null) { Counter c = group.getCounter($(COUNTER_NAME)); if(c != null) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java index 745eedcb86..691ff657cd 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java @@ -31,6 +31,7 @@ import org.apache.hadoop.mapreduce.v2.app.MockJobs; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.Task; +import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.yarn.Clock; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -153,7 +154,7 @@ public static Map getTaskParams(AppContext appContext) { e.getValue().getType(); Map params = new HashMap(); params.put(AMParams.JOB_ID, MRApps.toString(jobId)); - params.put(AMParams.TASK_ID, e.getKey().toString()); + params.put(AMParams.TASK_ID, MRApps.toString(e.getKey())); params.put(AMParams.TASK_TYPE, MRApps.taskSymbol(e.getValue().getType())); return params; } @@ -179,6 +180,32 @@ public static Map getTaskParams(AppContext appContext) { WebAppTests.testPage(SingleCounterPage.class, AppContext.class, appContext, params); } + + @Test public void testTaskCountersView() { + AppContext appContext = new TestAppContext(); + Map params = getTaskParams(appContext); + WebAppTests.testPage(CountersPage.class, AppContext.class, + appContext, params); + } + + @Test public void testSingleTaskCounterView() { + AppContext appContext = new TestAppContext(0, 1, 1, 2); + Map params = getTaskParams(appContext); + params.put(AMParams.COUNTER_GROUP, + "org.apache.hadoop.mapreduce.FileSystemCounter"); + params.put(AMParams.COUNTER_NAME, "HDFS_WRITE_OPS"); + + // remove counters from one task attempt + // to test handling of missing counters + TaskId taskID = MRApps.toTaskID(params.get(AMParams.TASK_ID)); + Job job = appContext.getJob(taskID.getJobId()); + Task task = job.getTask(taskID); + TaskAttempt attempt = task.getAttempts().values().iterator().next(); + attempt.getReport().setCounters(null); + + WebAppTests.testPage(SingleCounterPage.class, AppContext.class, + appContext, params); + } public static void main(String[] args) { WebApps.$for("yarn", AppContext.class, new TestAppContext(0, 8, 88, 4)). From f73bd5402e49ae6ed712eea70bb3a76314f0a695 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Fri, 16 Dec 2011 02:09:00 +0000 Subject: [PATCH 2/9] MAPREDUCE-3564. Fixed failures in TestStagingCleanup and TestJobEndNotifier tests. Contributed by Siddharth Seth. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1215022 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../hadoop/mapreduce/v2/app/MRAppMaster.java | 20 ++++++++++--------- .../mapreduce/v2/app/TestJobEndNotifier.java | 4 ++-- .../mapreduce/v2/app/TestStagingCleanup.java | 1 + 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 919bb08bdc..406d365339 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -317,6 +317,9 @@ Release 0.23.1 - Unreleased MAPREDUCE-3487. Fixed JobHistory web-UI to display links to single task's counters' page. (Jason Lowe via vinodkv) + MAPREDUCE-3564. Fixed failures in TestStagingCleanup and TestJobEndNotifier + tests. (Siddharth Seth via vinodkv) + Release 0.23.0 - 2011-11-01 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java index 33c1fd3cc0..5c2e0fd0c8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java @@ -375,15 +375,17 @@ public void handle(JobFinishEvent event) { // this is the only job, so shut down the Appmaster // note in a workflow scenario, this may lead to creation of a new // job (FIXME?) - try { - LOG.info("Job end notification started for jobID : " - + job.getReport().getJobId()); - JobEndNotifier notifier = new JobEndNotifier(); - notifier.setConf(getConfig()); - notifier.notify(job.getReport()); - } catch (InterruptedException ie) { - LOG.warn("Job end notification interrupted for jobID : " - + job.getReport().getJobId(), ie ); + if (getConfig().get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL) != null) { + try { + LOG.info("Job end notification started for jobID : " + + job.getReport().getJobId()); + JobEndNotifier notifier = new JobEndNotifier(); + notifier.setConf(getConfig()); + notifier.notify(job.getReport()); + } catch (InterruptedException ie) { + LOG.warn("Job end notification interrupted for jobID : " + + job.getReport().getJobId(), ie); + } } // TODO:currently just wait for some time so clients can know the diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java index 3cf6ea9c70..52ca1cf3f8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java @@ -96,8 +96,8 @@ public void testNotifyRetries() throws InterruptedException { conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_URL, "http://nonexistent"); conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3"); conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3"); - conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3"); - conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3"); + conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000"); + conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000"); JobReport jobReport = Mockito.mock(JobReport.class); long startTime = System.currentTimeMillis(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java index 5146acb599..e0dbac97b6 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java @@ -73,6 +73,7 @@ public void testDeletionofStaging() throws IOException { JobId jobid = recordFactory.newRecordInstance(JobId.class); jobid.setAppId(appId); MRAppMaster appMaster = new TestMRApp(attemptId); + appMaster.init(conf); EventHandler handler = appMaster.createJobFinishEventHandler(); handler.handle(new JobFinishEvent(jobid)); From c4ff89252030e4e89aa428ce92eb81d4667976ab Mon Sep 17 00:00:00 2001 From: Siddharth Seth Date: Fri, 16 Dec 2011 03:10:31 +0000 Subject: [PATCH 3/9] MAPREDUCE-3422. Counter display names are not being picked up. Contributed by Jonathan Eagles) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1215031 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../org/apache/hadoop/mapreduce/FileSystemCounter.properties | 0 .../org/apache/hadoop/mapreduce/JobCounter.properties | 0 .../org/apache/hadoop/mapreduce/TaskCounter.properties | 0 .../mapreduce/lib/input/FileInputFormatCounter.properties | 0 .../mapreduce/lib/output/FileOutputFormatCounter.properties | 0 6 files changed, 3 insertions(+) rename hadoop-mapreduce-project/{src/java => hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources}/org/apache/hadoop/mapreduce/FileSystemCounter.properties (100%) rename hadoop-mapreduce-project/{src/java => hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources}/org/apache/hadoop/mapreduce/JobCounter.properties (100%) rename hadoop-mapreduce-project/{src/java => hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources}/org/apache/hadoop/mapreduce/TaskCounter.properties (100%) rename hadoop-mapreduce-project/{src/java => hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources}/org/apache/hadoop/mapreduce/lib/input/FileInputFormatCounter.properties (100%) rename hadoop-mapreduce-project/{src/java => hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources}/org/apache/hadoop/mapreduce/lib/output/FileOutputFormatCounter.properties (100%) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 406d365339..a851daf472 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -320,6 +320,9 @@ Release 0.23.1 - Unreleased MAPREDUCE-3564. Fixed failures in TestStagingCleanup and TestJobEndNotifier tests. (Siddharth Seth via vinodkv) + MAPREDUCE-3422. Counter display names are not being picked up. (Jonathan + Eagles via sseth) + Release 0.23.0 - 2011-11-01 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/FileSystemCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/FileSystemCounter.properties similarity index 100% rename from hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/FileSystemCounter.properties rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/FileSystemCounter.properties diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/JobCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/JobCounter.properties similarity index 100% rename from hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/JobCounter.properties rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/JobCounter.properties diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/TaskCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/TaskCounter.properties similarity index 100% rename from hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/TaskCounter.properties rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/TaskCounter.properties diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormatCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/lib/input/FileInputFormatCounter.properties similarity index 100% rename from hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormatCounter.properties rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/lib/input/FileInputFormatCounter.properties diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormatCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/lib/output/FileOutputFormatCounter.properties similarity index 100% rename from hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormatCounter.properties rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/lib/output/FileOutputFormatCounter.properties From 6d551b83de252dead71d102ec553a819c56294af Mon Sep 17 00:00:00 2001 From: Mahadev Konar Date: Fri, 16 Dec 2011 09:09:28 +0000 Subject: [PATCH 4/9] MAPREDUCE-3366. Mapreduce component should use consistent directory structure layout as HDFS/common (Eric Yang via mahadev) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1215065 13f79535-47bb-0310-9956-ffa450edef68 --- .../assemblies/hadoop-mapreduce-dist.xml | 75 +++++++++++++++++-- .../src/main/bin/hadoop-config.sh | 17 +++++ hadoop-dist/pom.xml | 12 +-- hadoop-mapreduce-project/CHANGES.txt | 3 + hadoop-mapreduce-project/INSTALL | 6 +- .../hadoop-yarn/bin/slaves.sh | 2 +- .../bin/{start-all.sh => start-yarn.sh} | 2 +- .../bin/{stop-all.sh => stop-yarn.sh} | 2 +- hadoop-mapreduce-project/hadoop-yarn/bin/yarn | 37 +-------- .../hadoop-yarn/bin/yarn-config.sh | 39 +++++----- .../hadoop-yarn/bin/yarn-daemon.sh | 2 +- .../hadoop-yarn/bin/yarn-daemons.sh | 2 +- .../hadoop-yarn/conf/yarn-env.sh | 4 - .../hadoop/yarn/api/ApplicationConstants.java | 4 +- .../src/site/apt/SingleCluster.apt.vm | 18 +---- hadoop-mapreduce-project/pom.xml | 19 ++--- 16 files changed, 130 insertions(+), 114 deletions(-) rename hadoop-mapreduce-project/hadoop-yarn/bin/{start-all.sh => start-yarn.sh} (97%) rename hadoop-mapreduce-project/hadoop-yarn/bin/{stop-all.sh => stop-yarn.sh} (97%) diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml index 4868590ac4..57f3c66dad 100644 --- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml +++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml @@ -22,7 +22,6 @@ dir false - hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/target/native/target/usr/local/bin @@ -33,7 +32,7 @@ hadoop-yarn/bin bin - * + yarn 0755 @@ -41,17 +40,81 @@ bin bin - * + mapred + + 0755 + + + bin + libexec + + mapred-config.sh + + 0755 + + + hadoop-yarn/bin + libexec + + yarn-config.sh + + 0755 + + + hadoop-yarn/bin + sbin + + yarn-daemon.sh + yarn-daemons.sh + start-yarn.sh + stop-yarn.sh 0755 hadoop-yarn/conf - conf + etc/hadoop **/* + + ${basedir} + /share/doc/hadoop/${hadoop.component} + + *.txt + + + + ${project.build.directory}/webapps + /share/hadoop/${hadoop.component}/webapps + + + ${basedir}/src/main/conf + /share/hadoop/${hadoop.component}/templates + + *-site.xml + + + + ${basedir}/src/main/packages/templates/conf + /share/hadoop/${hadoop.component}/templates/conf + + * + + + + ${basedir}/dev-support/jdiff + /share/hadoop/${hadoop.component}/jdiff + + + ${project.build.directory}/site/jdiff/xml + /share/hadoop/${hadoop.component}/jdiff + + + ${project.build.directory}/site + /share/doc/hadoop/${hadoop.component} + @@ -59,7 +122,7 @@ org.apache.hadoop:hadoop-yarn-server-tests - modules + share/hadoop/${hadoop.component} false false @@ -68,7 +131,7 @@ false - /lib + /share/hadoop/${hadoop.component}/lib org.apache.hadoop:hadoop-common diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh index e53ec737f5..71c9481714 100644 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh @@ -231,6 +231,23 @@ fi CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/share/hadoop/hdfs'/*' +# put yarn in classpath if present +if [ "$YARN_HOME" = "" ]; then + if [ -d "${HADOOP_PREFIX}/share/hadoop/mapreduce" ]; then + YARN_HOME=$HADOOP_PREFIX + fi +fi + +if [ -d "$YARN_HOME/share/hadoop/mapreduce/webapps" ]; then + CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce +fi + +if [ -d "$YARN_HOME/share/hadoop/mapreduce/lib" ]; then + CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce/lib'/*' +fi + +CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce'/*' + # cygwin path translation if $cygwin; then HADOOP_HDFS_HOME=`cygpath -w "$HADOOP_HDFS_HOME"` diff --git a/hadoop-dist/pom.xml b/hadoop-dist/pom.xml index ed6b729a93..93fe32be25 100644 --- a/hadoop-dist/pom.xml +++ b/hadoop-dist/pom.xml @@ -76,6 +76,9 @@ dist false + + tar|rpm|deb + @@ -114,15 +117,6 @@ run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${project.version}/* . run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs-httpfs/target/hadoop-hdfs-httpfs-${project.version}/* . run cp -r $ROOT/hadoop-mapreduce-project/target/hadoop-mapreduce-${project.version}/* . - COMMON_LIB=share/hadoop/common/lib - MODULES=../../../../modules - run ln -s $MODULES/hadoop-mapreduce-client-app-${project.version}.jar $COMMON_LIB - run ln -s $MODULES/hadoop-yarn-api-${project.version}.jar $COMMON_LIB - run ln -s $MODULES/hadoop-mapreduce-client-common-${project.version}.jar $COMMON_LIB - run ln -s $MODULES/hadoop-yarn-common-${project.version}.jar $COMMON_LIB - run ln -s $MODULES/hadoop-mapreduce-client-core-${project.version}.jar $COMMON_LIB - run ln -s $MODULES/hadoop-yarn-server-common-${project.version}.jar $COMMON_LIB - run ln -s $MODULES/hadoop-mapreduce-client-jobclient-${project.version}.jar $COMMON_LIB echo echo "Hadoop dist layout available at: ${project.build.directory}/hadoop-${project.version}" echo diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index a851daf472..95a98ff344 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -323,6 +323,9 @@ Release 0.23.1 - Unreleased MAPREDUCE-3422. Counter display names are not being picked up. (Jonathan Eagles via sseth) + MAPREDUCE-3366. Mapreduce component should use consistent directory structure + layout as HDFS/common (Eric Yang via mahadev) + Release 0.23.0 - 2011-11-01 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/INSTALL b/hadoop-mapreduce-project/INSTALL index e6de8cb92e..e75b2aff2f 100644 --- a/hadoop-mapreduce-project/INSTALL +++ b/hadoop-mapreduce-project/INSTALL @@ -55,11 +55,11 @@ Step 8) Modify mapred-site.xml to use yarn framework Step 9) cd $YARN_HOME -Step 10) bin/yarn-daemon.sh start resourcemanager +Step 10) sbin/yarn-daemon.sh start resourcemanager -Step 11) bin/yarn-daemon.sh start nodemanager +Step 11) sbin/yarn-daemon.sh start nodemanager -Step 12) bin/yarn-daemon.sh start historyserver +Step 12) sbin/yarn-daemon.sh start historyserver Step 13) You are all set, an example on how to run a mapreduce job is: cd $HADOOP_MAPRED_HOME diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/slaves.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/slaves.sh index ee83477901..ee254603d6 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/slaves.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/slaves.sh @@ -38,7 +38,7 @@ fi bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` -DEFAULT_LIBEXEC_DIR="$bin" +DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} . $HADOOP_LIBEXEC_DIR/yarn-config.sh diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/start-all.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/start-yarn.sh similarity index 97% rename from hadoop-mapreduce-project/hadoop-yarn/bin/start-all.sh rename to hadoop-mapreduce-project/hadoop-yarn/bin/start-yarn.sh index fa4fcf3d0d..ccd63a4478 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/start-all.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/start-yarn.sh @@ -23,7 +23,7 @@ echo "starting yarn daemons" bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` -DEFAULT_LIBEXEC_DIR="$bin" +DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} . $HADOOP_LIBEXEC_DIR/yarn-config.sh diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/stop-all.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/stop-yarn.sh similarity index 97% rename from hadoop-mapreduce-project/hadoop-yarn/bin/stop-all.sh rename to hadoop-mapreduce-project/hadoop-yarn/bin/stop-yarn.sh index 546b67f5c9..c10d1ce7d1 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/stop-all.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/stop-yarn.sh @@ -23,7 +23,7 @@ echo "stopping yarn daemons" bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` -DEFAULT_LIBEXEC_DIR="$bin" +DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} . $HADOOP_LIBEXEC_DIR/yarn-config.sh diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn index b8e23a97f5..f5c8c1f8e8 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn @@ -44,7 +44,7 @@ bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` -DEFAULT_LIBEXEC_DIR="$bin" +DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} . $HADOOP_LIBEXEC_DIR/yarn-config.sh @@ -109,8 +109,7 @@ if [ ! -d "$HADOOP_CONF_DIR" ]; then exit 1 fi -CLASSPATH="${HADOOP_CONF_DIR}:${YARN_CONF_DIR}" -CLASSPATH=${CLASSPATH}:${YARN_CLASSPATH} +CLASSPATH="${HADOOP_CONF_DIR}:${YARN_CONF_DIR}:${CLASSPATH}" CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar # for developers, add Hadoop classes to CLASSPATH @@ -146,38 +145,6 @@ fi # so that filenames w/ spaces are handled correctly in loops below IFS= -# add hadoop-common libs to CLASSPATH -if [ ! -d "$HADOOP_COMMON_HOME" ]; then - if [ -d "$HADOOP_PREFIX" ]; then - export HADOOP_COMMON_HOME=$HADOOP_PREFIX - else - echo No HADOOP_COMMON_HOME set. - echo Please specify it either in yarn-env.sh or in the environment. - exit 1 - fi -fi - -CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/share/hadoop/common'/*' -CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/share/hadoop/common/lib'/*' - -# add hadoop-hdfs libs to CLASSPATH -if [ ! -d "$HADOOP_HDFS_HOME" ]; then - if [ -d "$HADOOP_PREFIX" ]; then - export HADOOP_HDFS_HOME=$HADOOP_PREFIX - else - echo No HADOOP_HDFS_HOME set. - echo Please specify it either in yarn-env.sh or in the environment. - exit 1 - fi -fi -CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/share/hadoop/hdfs'/*' -CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib'/*' - -# add yarn libs to CLASSPATH - -CLASSPATH=${CLASSPATH}:$YARN_HOME/modules'/*' -CLASSPATH=${CLASSPATH}:$YARN_HOME/lib'/*' - # default log directory & file if [ "$YARN_LOG_DIR" = "" ]; then YARN_LOG_DIR="$YARN_HOME/logs" diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh index 4371484b86..2757044273 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh @@ -15,29 +15,24 @@ # included in all the hadoop scripts with source command # should not be executable directly -# also should not be passed any arguments, since we need original $* - -# resolve links - $0 may be a softlink - -this="$0" -while [ -h "$this" ]; do - ls=`ls -ld "$this"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '.*/.*' > /dev/null; then - this="$link" - else - this=`dirname "$this"`/"$link" - fi -done - -# convert relative path to absolute path -bin=`dirname "$this"` -script=`basename "$this"` +bin=`which "$0"` +bin=`dirname "${bin}"` bin=`cd "$bin"; pwd` -this="$bin/$script" -# the root of the Hadoop installation -export YARN_HOME=`dirname "$this"`/.. +export HADOOP_PREFIX="${HADOOP_PREFIX:-$bin/..}" + +DEFAULT_LIBEXEC_DIR="$bin"/../libexec +HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} +if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then + . ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh +elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then + . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh +elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then + . "$HADOOP_HOME"/libexec/hadoop-config.sh +else + echo "Hadoop common not found." + exit +fi # Same glibc bug that discovered in Hadoop. # Without this you can see very large vmem settings on containers. @@ -56,7 +51,7 @@ then fi # Allow alternate conf dir location. -YARN_CONF_DIR="${YARN_CONF_DIR:-$YARN_HOME/conf}" +YARN_CONF_DIR="${HADOOP_CONF_DIR:-$YARN_HOME/conf}" #check to see it is specified whether to use the slaves or the # masters file diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh index 99fcb0a550..6e41f791c3 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh @@ -39,7 +39,7 @@ fi bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` -DEFAULT_LIBEXEC_DIR="$bin" +DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} . $HADOOP_LIBEXEC_DIR/yarn-config.sh diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemons.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemons.sh index e34e4ca8b1..aafb42b9b1 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemons.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemons.sh @@ -30,7 +30,7 @@ fi bin=`dirname "${BASH_SOURCE-$0}"` bin=`cd "$bin"; pwd` -DEFAULT_LIBEXEC_DIR="$bin" +DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} . $HADOOP_LIBEXEC_DIR/yarn-config.sh diff --git a/hadoop-mapreduce-project/hadoop-yarn/conf/yarn-env.sh b/hadoop-mapreduce-project/hadoop-yarn/conf/yarn-env.sh index b219eddf1a..cfcb250b8e 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/conf/yarn-env.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/conf/yarn-env.sh @@ -41,10 +41,6 @@ if [ "$YARN_HEAPSIZE" != "" ]; then #echo $JAVA_HEAP_MAX fi -# CLASSPATH initially contains $YARN_CONF_DIR -CLASSPATH="${YARN_CONF_DIR}" -CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar - # so that filenames w/ spaces are handled correctly in loops below IFS= diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java index c4d0d78ea5..9439e21cfa 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java @@ -95,8 +95,8 @@ public interface ApplicationConstants { "$HADOOP_COMMON_HOME/share/hadoop/common/lib/*", "$HADOOP_HDFS_HOME/share/hadoop/hdfs/*", "$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*", - "$YARN_HOME/modules/*", - "$YARN_HOME/lib/*" + "$YARN_HOME/share/hadoop/mapreduce/*", + "$YARN_HOME/share/hadoop/mapreduce/lib/*" }; /** diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/SingleCluster.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/SingleCluster.apt.vm index 3d34351708..f4ea1fe69c 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/SingleCluster.apt.vm +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/SingleCluster.apt.vm @@ -171,20 +171,6 @@ Add the following configs to your <<>> +---+ -* Create Symlinks. - - You will have to create the following symlinks: - -+---+ -$ cd $HADOOP_COMMON_HOME/share/hadoop/common/lib/ -$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-app-*-SNAPSHOT.jar . -$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-jobclient-*-SNAPSHOT.jar . -$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-common-*-SNAPSHOT.jar . -$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-shuffle-*-SNAPSHOT.jar . -$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-core-*-SNAPSHOT.jar . -$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-yarn-common-*-SNAPSHOT.jar . -$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-yarn-api-*-SNAPSHOT.jar . -+---+ * Running daemons. Assuming that the environment variables <<$HADOOP_COMMON_HOME>>, <<$HADOOP_HDFS_HOME>>, <<$HADOO_MAPRED_HOME>>, @@ -195,8 +181,8 @@ $ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-yarn-api-*-SNAPSHOT.jar . +---+ $ cd $HADOOP_MAPRED_HOME -$ bin/yarn-daemon.sh start resourcemanager -$ bin/yarn-daemon.sh start nodemanager +$ sbin/yarn-daemon.sh start resourcemanager +$ sbin/yarn-daemon.sh start nodemanager +---+ You should be up and running. You can run randomwriter as: diff --git a/hadoop-mapreduce-project/pom.xml b/hadoop-mapreduce-project/pom.xml index 74970dd5ee..b9e64473cf 100644 --- a/hadoop-mapreduce-project/pom.xml +++ b/hadoop-mapreduce-project/pom.xml @@ -34,6 +34,8 @@ 600000 once ${basedir} + mapreduce + true @@ -321,7 +323,10 @@ - release + dist + + false + @@ -336,16 +341,6 @@ - - - - - dist - - false - - - org.apache.maven.plugins maven-assembly-plugin @@ -367,7 +362,7 @@ - dist + package-mapreduce prepare-package single From a238f931ea7dce0ca620d1798156c84ff77097ff Mon Sep 17 00:00:00 2001 From: Amar Kamat Date: Fri, 16 Dec 2011 14:20:58 +0000 Subject: [PATCH 5/9] MAPREDUCE-778. Rumen Anonymizer. (Amar Kamat and Chris Douglas via amarrk) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1215141 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 1 + hadoop-mapreduce-project/ivy.xml | 7 + .../ivy/libraries.properties | 1 + .../hadoop/mapred/gridmix/GridmixJob.java | 17 +- .../src/documentation/content/xdocs/rumen.xml | 172 +- .../tools/rumen/TestRumenAnonymization.java | 1940 +++++++++++++++++ .../hadoop/tools/rumen/TestRumenFolder.java | 4 - .../tools/rumen/TestRumenJobTraces.java | 4 +- .../apache/hadoop/tools/rumen/Anonymizer.java | 273 +++ .../org/apache/hadoop/tools/rumen/Folder.java | 79 +- .../tools/rumen/HadoopLogsAnalyzer.java | 45 +- .../apache/hadoop/tools/rumen/JobBuilder.java | 25 +- .../tools/rumen/JsonObjectMapperWriter.java | 19 + .../apache/hadoop/tools/rumen/LoggedJob.java | 78 +- .../hadoop/tools/rumen/LoggedLocation.java | 47 +- .../tools/rumen/LoggedNetworkTopology.java | 19 +- .../apache/hadoop/tools/rumen/LoggedTask.java | 11 +- .../hadoop/tools/rumen/LoggedTaskAttempt.java | 47 +- .../apache/hadoop/tools/rumen/ParsedHost.java | 14 +- .../hadoop/tools/rumen/ZombieCluster.java | 7 +- .../apache/hadoop/tools/rumen/ZombieJob.java | 52 +- .../rumen/anonymization/DataAnonymizer.java | 27 + .../tools/rumen/anonymization/WordList.java | 106 + .../WordListAnonymizerUtility.java | 110 + .../rumen/datatypes/AnonymizableDataType.java | 28 + .../tools/rumen/datatypes/ClassName.java | 57 + .../tools/rumen/datatypes/DataType.java | 25 + .../DefaultAnonymizableDataType.java | 67 + .../rumen/datatypes/DefaultDataType.java | 37 + .../tools/rumen/datatypes/FileName.java | 213 ++ .../hadoop/tools/rumen/datatypes/JobName.java | 41 + .../tools/rumen/datatypes/JobProperties.java | 93 + .../tools/rumen/datatypes/NodeName.java | 185 ++ .../tools/rumen/datatypes/QueueName.java | 41 + .../tools/rumen/datatypes/UserName.java | 40 + .../util/DefaultJobPropertiesParser.java | 31 + .../datatypes/util/JobPropertyParser.java | 34 + .../util/MapReduceJobPropertiesParser.java | 227 ++ .../rumen/serializers/BlockingSerializer.java | 36 + .../DefaultAnonymizingRumenSerializer.java | 57 + .../serializers/DefaultRumenSerializer.java | 42 + .../serializers/ObjectStringSerializer.java | 35 + .../hadoop/tools/rumen/state/State.java | 46 + .../tools/rumen/state/StateDeserializer.java | 59 + .../hadoop/tools/rumen/state/StatePool.java | 345 +++ 45 files changed, 4598 insertions(+), 246 deletions(-) create mode 100644 hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenAnonymization.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Anonymizer.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/anonymization/DataAnonymizer.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/anonymization/WordList.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/anonymization/WordListAnonymizerUtility.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/AnonymizableDataType.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/ClassName.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/DataType.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/DefaultAnonymizableDataType.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/DefaultDataType.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/FileName.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/JobName.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/JobProperties.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/NodeName.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/QueueName.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/UserName.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/util/DefaultJobPropertiesParser.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/util/JobPropertyParser.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/util/MapReduceJobPropertiesParser.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/BlockingSerializer.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/DefaultAnonymizingRumenSerializer.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/DefaultRumenSerializer.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/ObjectStringSerializer.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/state/State.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/state/StateDeserializer.java create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/state/StatePool.java diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 95a98ff344..6d198a745e 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -6,6 +6,7 @@ Trunk (unreleased changes) MAPREDUCE-3545. Remove Avro RPC. (suresh) NEW FEATURES + MAPREDUCE-778. Rumen Anonymizer. (Amar Kamat and Chris Douglas via amarrk) MAPREDUCE-2669. Add new examples for Mean, Median, and Standard Deviation. (Plamen Jeliazkov via shv) diff --git a/hadoop-mapreduce-project/ivy.xml b/hadoop-mapreduce-project/ivy.xml index e9b38d077e..e04da7019b 100644 --- a/hadoop-mapreduce-project/ivy.xml +++ b/hadoop-mapreduce-project/ivy.xml @@ -139,6 +139,13 @@ + + + + + diff --git a/hadoop-mapreduce-project/ivy/libraries.properties b/hadoop-mapreduce-project/ivy/libraries.properties index 360c5a9967..93a10282fc 100644 --- a/hadoop-mapreduce-project/ivy/libraries.properties +++ b/hadoop-mapreduce-project/ivy/libraries.properties @@ -81,5 +81,6 @@ wagon-http.version=1.0-beta-2 xmlenc.version=0.52 xerces.version=1.4.4 +jackson.version=1.8.2 yarn.version=0.24.0-SNAPSHOT hadoop-mapreduce.version=0.24.0-SNAPSHOT diff --git a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java index 9b6ed69f57..77ec697872 100644 --- a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java +++ b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java @@ -26,8 +26,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Delayed; import java.util.concurrent.TimeUnit; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import java.security.PrivilegedExceptionAction; import org.apache.hadoop.conf.Configuration; @@ -49,6 +47,7 @@ import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.tools.rumen.JobStory; +import static org.apache.hadoop.tools.rumen.datatypes.util.MapReduceJobPropertiesParser.extractMaxHeapOpts; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -92,8 +91,6 @@ protected Formatter initialValue() { // configuration key to enable/disable task jvm options static final String GRIDMIX_TASK_JVM_OPTIONS_ENABLE = "gridmix.task.jvm-options.enable"; - private static final Pattern maxHeapPattern = - Pattern.compile("-Xmx[0-9]+[kKmMgGtT]?+"); private static void setJobQueue(Job job, String queue) { if (queue != null) { @@ -225,18 +222,6 @@ private static void configureTaskJVMMaxHeapOptions(Configuration srcConf, } } } - - private static void extractMaxHeapOpts(String javaOptions, - List maxOpts, List others) { - for (String opt : javaOptions.split(" ")) { - Matcher matcher = maxHeapPattern.matcher(opt); - if (matcher.find()) { - maxOpts.add(opt); - } else { - others.add(opt); - } - } - } // Scales the desired job-level configuration parameter. This API makes sure // that the ratio of the job level configuration parameter to the cluster diff --git a/hadoop-mapreduce-project/src/docs/src/documentation/content/xdocs/rumen.xml b/hadoop-mapreduce-project/src/docs/src/documentation/content/xdocs/rumen.xml index 75b97ac5e8..dbe72c56ca 100644 --- a/hadoop-mapreduce-project/src/docs/src/documentation/content/xdocs/rumen.xml +++ b/hadoop-mapreduce-project/src/docs/src/documentation/content/xdocs/rumen.xml @@ -73,6 +73,11 @@ computed for the total number of successful tasks for every attempt. +
  • Anonymized traces enables sharing of production traces of large + scale Hadoop deployments. Sharing of traces will foster + collaboration within the Hadoop community. It can also be used to + supplement interesting research findings. +
  • @@ -102,6 +107,11 @@ Increasing the trace runtime might involve adding some dummy jobs to the resulting trace and scaling up the runtime of individual jobs. +
  • Anonymizer : + A utility to anonymize Hadoop job and cluster topology traces by + masking certain sensitive fields but retaining important workload + characteristics. +
  • @@ -128,10 +138,11 @@ output-duration, concentration etc. -

    Rumen provides 2 basic commands

    +

    Rumen provides 3 basic commands

    • TraceBuilder
    • Folder
    • +
    • Anonymizer

    Firstly, we need to generate the Gold Trace. Hence the first @@ -139,8 +150,9 @@ The output of the TraceBuilder is a job-trace file (and an optional cluster-topology file). In case we want to scale the output, we can use the Folder utility to fold the current trace to the - desired length. The remaining part of this section explains these - utilities in detail. + desired length. For anonymizing the trace, use the + Anonymizer utility. The remaining part of this section + explains these utilities in detail.

    Examples in this section assumes that certain libraries are present @@ -426,8 +438,156 @@

    +

    +

    +

    +

    +

    + + + + +
    + Anonymizer + +

    Command:

    + java org.apache.hadoop.tools.rumen.Anonymizer [options] [-trace <jobtrace-input> <jobtrace-output>] [-topology <topology-input> <topology-output>] + +

    This command invokes the Anonymizer utility of + Rumen. It anonymizes sensitive information from the + <jobtrace-input> file and outputs the anonymized + content into the <jobtrace-output> + file. It also anonymizes the cluster layout (topology) from the + <topology-input> and outputs it in + the <topology-output> file. + <job-input> represents the job trace file obtained + using TraceBuilder or Folder. + <topology-input> represents the cluster topology + file obtained using TraceBuilder. +

    + +

    Options :

    + + + + + + + + + + + + + + + + +
    ParameterDescriptionNotes
    -traceAnonymizes job traces.Anonymizes sensitive fields like user-name, job-name, queue-name + host-names, job configuration parameters etc.
    -topologyAnonymizes cluster topologyAnonymizes rack-names and host-names.
    + +
    + <em>Anonymizer</em> Configuration Parameters +

    The Rumen anonymizer can be configured using the following + configuration parameters: +

    + + + + + + + + + + + + + + + + + + + + + + + + + +
    ParameterDescription
    + rumen.data-types.classname.preserve + A comma separated list of prefixes that the Anonymizer + will not anonymize while processing classnames. If + rumen.data-types.classname.preserve is set to + 'org.apache,com.hadoop.' then + classnames starting with 'org.apache' or + 'com.hadoop.' will not be anonymized. +
    + rumen.datatypes.jobproperties.parsers + A comma separated list of job properties parsers. These parsers + decide how the job configuration parameters + (i.e <key,value> pairs) should be processed. Default is + MapReduceJobPropertiesParser. The default parser will + only parse framework-level MapReduce specific job configuration + properties. Users can add custom parsers by implementing the + JobPropertiesParser interface. Rumen also provides an + all-pass (i.e no filter) parser called + DefaultJobPropertiesParser. +
    + rumen.anonymization.states.dir + Set this to a location (on LocalFileSystem or HDFS) for enabling + state persistence and/or reload. This parameter is not set by + default. Reloading and persistence of states depend on the state + directory. Note that the state directory will contain the latest + as well as previous states. +
    + rumen.anonymization.states.persist + Set this to 'true' to persist the current state. + Default value is 'false'. Note that the states will + be persisted to the state manager's state directory + specified using the rumen.anonymization.states.dir + parameter. +
    + rumen.anonymization.states.reload + Set this to 'true' to enable reuse of previously + persisted state. The default value is 'false'. The + previously persisted state will be reloaded from the state + manager's state directory specified using the + rumen.anonymization.states.dir parameter. Note that + the Anonymizer will bail out if it fails to find any + previously persisted state in the state directory or if the state + directory is not set. If the user wishes to retain/reuse the + states across multiple invocations of the Anonymizer, + then the very first invocation of the Anonymizer should + have rumen.anonymization.states.reload set to + 'false' and + rumen.anonymization.states.persist set to + 'true'. Subsequent invocations of the + Anonymizer can then have + rumen.anonymization.states.reload set to + 'true'. +
    +
    + +
    + Example + java org.apache.hadoop.tools.rumen.Anonymizer -trace file:///home/user/job-trace.json file:///home/user/job-trace-anonymized.json -topology file:///home/user/cluster-topology.json file:///home/user/cluster-topology-anonymized.json +

    +

    This will anonymize the job details from + file:///home/user/job-trace.json and output it to + file:///home/user/job-trace-anonymized.json. + It will also anonymize the cluster topology layout from + file:///home/user/cluster-topology.json and output it to + file:///home/user/cluster-topology-anonymized.json. + Note that the Anonymizer also supports input and output + files on HDFS. +

    +
    -