From c2fe4a7e83775589481210383f62faa2dcb00ecc Mon Sep 17 00:00:00 2001
From: Vinod Kumar Vavilapalli <vinodkv@apache.org>
Date: Fri, 16 Dec 2011 01:51:15 +0000
Subject: [PATCH 1/9] MAPREDUCE-3487. Fixed JobHistory web-UI to display links
 to single task's counters' page. Contributed by Jason Lowe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1215016 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt          |  3 ++
 .../v2/app/webapp/CountersBlock.java          |  2 +-
 .../v2/app/webapp/SingleCounterBlock.java     |  6 ++--
 .../mapreduce/v2/app/webapp/TestAMWebApp.java | 29 ++++++++++++++++++-
 4 files changed, 36 insertions(+), 4 deletions(-)
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 041e83ff66..919bb08bdc 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -314,6 +314,9 @@ Release 0.23.1 - Unreleased
     MAPREDUCE-3560. TestRMNodeTransitions is failing on trunk. 
     (Siddharth Seth via mahadev)
 
+    MAPREDUCE-3487. Fixed JobHistory web-UI to display links to single task's
+    counters' page. (Jason Lowe via vinodkv)
+
 Release 0.23.0 - 2011-11-01 
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java
index cf6ab99a93..6accd8add7 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/CountersBlock.java
@@ -120,7 +120,7 @@ public class CountersBlock extends HtmlBlock {
         // Ditto
         TR<TBODY<TABLE<TD<TR<TBODY<TABLE<DIV<Hamlet>>>>>>>> groupRow = group.
           tr();
-          if (mg == null && rg == null) {
+          if (task == null && mg == null && rg == null) {
             groupRow.td().$title(counter.getName())._(counter.getDisplayName()).
             _();
           } else {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterBlock.java
index 1ec774e3fb..bb72822542 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterBlock.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/SingleCounterBlock.java
@@ -24,6 +24,7 @@
 
 import org.apache.hadoop.mapreduce.v2.api.records.Counter;
 import org.apache.hadoop.mapreduce.v2.api.records.CounterGroup;
+import org.apache.hadoop.mapreduce.v2.api.records.Counters;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
@@ -120,8 +121,9 @@ private void populateMembers(AppContext ctx) {
       for(Map.Entry<TaskAttemptId, TaskAttempt> entry : 
         task.getAttempts().entrySet()) {
         long value = 0;
-        CounterGroup group = entry.getValue().getCounters()
-        .getCounterGroup($(COUNTER_GROUP));
+        Counters counters = entry.getValue().getCounters();
+        CounterGroup group = (counters != null)
+        		? counters.getCounterGroup($(COUNTER_GROUP)) : null;
         if(group != null)  {
           Counter c = group.getCounter($(COUNTER_NAME));
           if(c != null) {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java
index 745eedcb86..691ff657cd 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java
@@ -31,6 +31,7 @@
 import org.apache.hadoop.mapreduce.v2.app.MockJobs;
 import org.apache.hadoop.mapreduce.v2.app.job.Job;
 import org.apache.hadoop.mapreduce.v2.app.job.Task;
+import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt;
 import org.apache.hadoop.mapreduce.v2.util.MRApps;
 import org.apache.hadoop.yarn.Clock;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
@@ -153,7 +154,7 @@ public static Map<String, String> getTaskParams(AppContext appContext) {
     e.getValue().getType();
     Map<String, String> params = new HashMap<String, String>();
     params.put(AMParams.JOB_ID, MRApps.toString(jobId));
-    params.put(AMParams.TASK_ID, e.getKey().toString());
+    params.put(AMParams.TASK_ID, MRApps.toString(e.getKey()));
     params.put(AMParams.TASK_TYPE, MRApps.taskSymbol(e.getValue().getType()));
     return params;
   }
@@ -179,6 +180,32 @@ public static Map<String, String> getTaskParams(AppContext appContext) {
     WebAppTests.testPage(SingleCounterPage.class, AppContext.class,
                          appContext, params);
   }
+
+  @Test public void testTaskCountersView() {
+    AppContext appContext = new TestAppContext();
+    Map<String, String> params = getTaskParams(appContext);
+    WebAppTests.testPage(CountersPage.class, AppContext.class,
+                         appContext, params);
+  }
+
+  @Test public void testSingleTaskCounterView() {
+    AppContext appContext = new TestAppContext(0, 1, 1, 2);
+    Map<String, String> params = getTaskParams(appContext);
+    params.put(AMParams.COUNTER_GROUP, 
+        "org.apache.hadoop.mapreduce.FileSystemCounter");
+    params.put(AMParams.COUNTER_NAME, "HDFS_WRITE_OPS");
+    
+    // remove counters from one task attempt
+    // to test handling of missing counters
+    TaskId taskID = MRApps.toTaskID(params.get(AMParams.TASK_ID));
+    Job job = appContext.getJob(taskID.getJobId());
+    Task task = job.getTask(taskID);
+    TaskAttempt attempt = task.getAttempts().values().iterator().next();
+    attempt.getReport().setCounters(null);
+    
+    WebAppTests.testPage(SingleCounterPage.class, AppContext.class,
+                         appContext, params);
+  }
   
   public static void main(String[] args) {
     WebApps.$for("yarn", AppContext.class, new TestAppContext(0, 8, 88, 4)).

From f73bd5402e49ae6ed712eea70bb3a76314f0a695 Mon Sep 17 00:00:00 2001
From: Vinod Kumar Vavilapalli <vinodkv@apache.org>
Date: Fri, 16 Dec 2011 02:09:00 +0000
Subject: [PATCH 2/9] MAPREDUCE-3564. Fixed failures in TestStagingCleanup and
 TestJobEndNotifier tests. Contributed by Siddharth Seth.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1215022 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt          |  3 +++
 .../hadoop/mapreduce/v2/app/MRAppMaster.java  | 20 ++++++++++---------
 .../mapreduce/v2/app/TestJobEndNotifier.java  |  4 ++--
 .../mapreduce/v2/app/TestStagingCleanup.java  |  1 +
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 919bb08bdc..406d365339 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -317,6 +317,9 @@ Release 0.23.1 - Unreleased
     MAPREDUCE-3487. Fixed JobHistory web-UI to display links to single task's
     counters' page. (Jason Lowe via vinodkv)
 
+    MAPREDUCE-3564. Fixed failures in TestStagingCleanup and TestJobEndNotifier
+    tests. (Siddharth Seth via vinodkv)
+
 Release 0.23.0 - 2011-11-01 
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
index 33c1fd3cc0..5c2e0fd0c8 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
@@ -375,15 +375,17 @@ public void handle(JobFinishEvent event) {
       // this is the only job, so shut down the Appmaster
       // note in a workflow scenario, this may lead to creation of a new
       // job (FIXME?)
-      try {
-        LOG.info("Job end notification started for jobID : "
-          + job.getReport().getJobId());
-        JobEndNotifier notifier = new JobEndNotifier();
-        notifier.setConf(getConfig());
-        notifier.notify(job.getReport());
-      } catch (InterruptedException ie) {
-        LOG.warn("Job end notification interrupted for jobID : "
-          + job.getReport().getJobId(), ie );
+      if (getConfig().get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL) != null) {
+        try {
+          LOG.info("Job end notification started for jobID : "
+              + job.getReport().getJobId());
+          JobEndNotifier notifier = new JobEndNotifier();
+          notifier.setConf(getConfig());
+          notifier.notify(job.getReport());
+        } catch (InterruptedException ie) {
+          LOG.warn("Job end notification interrupted for jobID : "
+              + job.getReport().getJobId(), ie);
+        }
       }
 
       // TODO:currently just wait for some time so clients can know the
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java
index 3cf6ea9c70..52ca1cf3f8 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java
@@ -96,8 +96,8 @@ public void testNotifyRetries() throws InterruptedException {
     conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_URL, "http://nonexistent");
     conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3");
     conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3");
-    conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3");
-    conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3");
+    conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000");
+    conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000");
     JobReport jobReport = Mockito.mock(JobReport.class);
 
     long startTime = System.currentTimeMillis();
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java
index 5146acb599..e0dbac97b6 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java
@@ -73,6 +73,7 @@ public void testDeletionofStaging() throws IOException {
      JobId jobid = recordFactory.newRecordInstance(JobId.class);
      jobid.setAppId(appId);
      MRAppMaster appMaster = new TestMRApp(attemptId);
+     appMaster.init(conf);
      EventHandler<JobFinishEvent> handler = 
          appMaster.createJobFinishEventHandler();
      handler.handle(new JobFinishEvent(jobid));

From c4ff89252030e4e89aa428ce92eb81d4667976ab Mon Sep 17 00:00:00 2001
From: Siddharth Seth <sseth@apache.org>
Date: Fri, 16 Dec 2011 03:10:31 +0000
Subject: [PATCH 3/9] MAPREDUCE-3422. Counter display names are not being
 picked up. Contributed by Jonathan Eagles)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1215031 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt                           | 3 +++
 .../org/apache/hadoop/mapreduce/FileSystemCounter.properties   | 0
 .../org/apache/hadoop/mapreduce/JobCounter.properties          | 0
 .../org/apache/hadoop/mapreduce/TaskCounter.properties         | 0
 .../mapreduce/lib/input/FileInputFormatCounter.properties      | 0
 .../mapreduce/lib/output/FileOutputFormatCounter.properties    | 0
 6 files changed, 3 insertions(+)
 rename hadoop-mapreduce-project/{src/java => hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources}/org/apache/hadoop/mapreduce/FileSystemCounter.properties (100%)
 rename hadoop-mapreduce-project/{src/java => hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources}/org/apache/hadoop/mapreduce/JobCounter.properties (100%)
 rename hadoop-mapreduce-project/{src/java => hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources}/org/apache/hadoop/mapreduce/TaskCounter.properties (100%)
 rename hadoop-mapreduce-project/{src/java => hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources}/org/apache/hadoop/mapreduce/lib/input/FileInputFormatCounter.properties (100%)
 rename hadoop-mapreduce-project/{src/java => hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources}/org/apache/hadoop/mapreduce/lib/output/FileOutputFormatCounter.properties (100%)

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 406d365339..a851daf472 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -320,6 +320,9 @@ Release 0.23.1 - Unreleased
     MAPREDUCE-3564. Fixed failures in TestStagingCleanup and TestJobEndNotifier
     tests. (Siddharth Seth via vinodkv)
 
+    MAPREDUCE-3422. Counter display names are not being picked up. (Jonathan
+    Eagles via sseth)
+
 Release 0.23.0 - 2011-11-01 
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/FileSystemCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/FileSystemCounter.properties
similarity index 100%
rename from hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/FileSystemCounter.properties
rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/FileSystemCounter.properties
diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/JobCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/JobCounter.properties
similarity index 100%
rename from hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/JobCounter.properties
rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/JobCounter.properties
diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/TaskCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/TaskCounter.properties
similarity index 100%
rename from hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/TaskCounter.properties
rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/TaskCounter.properties
diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormatCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/lib/input/FileInputFormatCounter.properties
similarity index 100%
rename from hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormatCounter.properties
rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/lib/input/FileInputFormatCounter.properties
diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormatCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/lib/output/FileOutputFormatCounter.properties
similarity index 100%
rename from hadoop-mapreduce-project/src/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormatCounter.properties
rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/lib/output/FileOutputFormatCounter.properties

From 6d551b83de252dead71d102ec553a819c56294af Mon Sep 17 00:00:00 2001
From: Mahadev Konar <mahadev@apache.org>
Date: Fri, 16 Dec 2011 09:09:28 +0000
Subject: [PATCH 4/9] MAPREDUCE-3366. Mapreduce component should use consistent
 directory structure layout as HDFS/common (Eric Yang via mahadev)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1215065 13f79535-47bb-0310-9956-ffa450edef68
---
 .../assemblies/hadoop-mapreduce-dist.xml      | 75 +++++++++++++++++--
 .../src/main/bin/hadoop-config.sh             | 17 +++++
 hadoop-dist/pom.xml                           | 12 +--
 hadoop-mapreduce-project/CHANGES.txt          |  3 +
 hadoop-mapreduce-project/INSTALL              |  6 +-
 .../hadoop-yarn/bin/slaves.sh                 |  2 +-
 .../bin/{start-all.sh => start-yarn.sh}       |  2 +-
 .../bin/{stop-all.sh => stop-yarn.sh}         |  2 +-
 hadoop-mapreduce-project/hadoop-yarn/bin/yarn | 37 +--------
 .../hadoop-yarn/bin/yarn-config.sh            | 39 +++++-----
 .../hadoop-yarn/bin/yarn-daemon.sh            |  2 +-
 .../hadoop-yarn/bin/yarn-daemons.sh           |  2 +-
 .../hadoop-yarn/conf/yarn-env.sh              |  4 -
 .../hadoop/yarn/api/ApplicationConstants.java |  4 +-
 .../src/site/apt/SingleCluster.apt.vm         | 18 +----
 hadoop-mapreduce-project/pom.xml              | 19 ++---
 16 files changed, 130 insertions(+), 114 deletions(-)
 rename hadoop-mapreduce-project/hadoop-yarn/bin/{start-all.sh => start-yarn.sh} (97%)
 rename hadoop-mapreduce-project/hadoop-yarn/bin/{stop-all.sh => stop-yarn.sh} (97%)

diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml
index 4868590ac4..57f3c66dad 100644
--- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml
+++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml
@@ -22,7 +22,6 @@
     <format>dir</format>
   </formats>
   <includeBaseDirectory>false</includeBaseDirectory>
-  <!-- TODO: this layout is wrong. We need module specific bin files in module specific dirs -->
   <fileSets>
     <fileSet>
       <directory>hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/target/native/target/usr/local/bin</directory>
@@ -33,7 +32,7 @@
       <directory>hadoop-yarn/bin</directory>
       <outputDirectory>bin</outputDirectory>
       <includes>
-        <include>*</include>
+        <include>yarn</include>
       </includes>
       <fileMode>0755</fileMode>
     </fileSet>
@@ -41,17 +40,81 @@
       <directory>bin</directory>
       <outputDirectory>bin</outputDirectory>
       <includes>
-        <include>*</include>
+        <include>mapred</include>
+      </includes>
+      <fileMode>0755</fileMode>
+    </fileSet>
+    <fileSet>
+      <directory>bin</directory>
+      <outputDirectory>libexec</outputDirectory>
+      <includes>
+        <include>mapred-config.sh</include>
+      </includes>
+      <fileMode>0755</fileMode>
+    </fileSet>
+    <fileSet>
+      <directory>hadoop-yarn/bin</directory>
+      <outputDirectory>libexec</outputDirectory>
+      <includes>
+        <include>yarn-config.sh</include>
+      </includes>
+      <fileMode>0755</fileMode>
+    </fileSet>
+    <fileSet>
+      <directory>hadoop-yarn/bin</directory>
+      <outputDirectory>sbin</outputDirectory>
+      <includes>
+        <include>yarn-daemon.sh</include>
+        <include>yarn-daemons.sh</include>
+        <include>start-yarn.sh</include>
+        <include>stop-yarn.sh</include>
       </includes>
       <fileMode>0755</fileMode>
     </fileSet>
     <fileSet>
       <directory>hadoop-yarn/conf</directory>
-      <outputDirectory>conf</outputDirectory>
+      <outputDirectory>etc/hadoop</outputDirectory>
       <includes>
         <include>**/*</include>
       </includes>
     </fileSet>
+    <fileSet>
+      <directory>${basedir}</directory>
+      <outputDirectory>/share/doc/hadoop/${hadoop.component}</outputDirectory>
+      <includes>
+        <include>*.txt</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>${project.build.directory}/webapps</directory>
+      <outputDirectory>/share/hadoop/${hadoop.component}/webapps</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/conf</directory>
+      <outputDirectory>/share/hadoop/${hadoop.component}/templates</outputDirectory>
+      <includes>
+        <include>*-site.xml</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/packages/templates/conf</directory>
+      <outputDirectory>/share/hadoop/${hadoop.component}/templates/conf</outputDirectory>
+      <includes>
+        <include>*</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/dev-support/jdiff</directory>
+      <outputDirectory>/share/hadoop/${hadoop.component}/jdiff</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${project.build.directory}/site/jdiff/xml</directory>
+      <outputDirectory>/share/hadoop/${hadoop.component}/jdiff</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${project.build.directory}/site</directory>
+      <outputDirectory>/share/doc/hadoop/${hadoop.component}</outputDirectory>
+    </fileSet>
   </fileSets>
   <moduleSets>
     <moduleSet>
@@ -59,7 +122,7 @@
         <exclude>org.apache.hadoop:hadoop-yarn-server-tests</exclude>
       </excludes>
       <binaries>
-        <outputDirectory>modules</outputDirectory>
+        <outputDirectory>share/hadoop/${hadoop.component}</outputDirectory>
         <includeDependencies>false</includeDependencies>
         <unpack>false</unpack>
       </binaries>
@@ -68,7 +131,7 @@
   <dependencySets>
     <dependencySet>
       <useProjectArtifact>false</useProjectArtifact>
-      <outputDirectory>/lib</outputDirectory>
+      <outputDirectory>/share/hadoop/${hadoop.component}/lib</outputDirectory>
       <!-- Exclude hadoop artifacts. They will be found via HADOOP* env -->
       <excludes>
         <exclude>org.apache.hadoop:hadoop-common</exclude>
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
index e53ec737f5..71c9481714 100644
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh
@@ -231,6 +231,23 @@ fi
 
 CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/share/hadoop/hdfs'/*'
 
+# put yarn in classpath if present
+if [ "$YARN_HOME" = "" ]; then
+  if [ -d "${HADOOP_PREFIX}/share/hadoop/mapreduce" ]; then
+    YARN_HOME=$HADOOP_PREFIX
+  fi
+fi
+
+if [ -d "$YARN_HOME/share/hadoop/mapreduce/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce
+fi
+
+if [ -d "$YARN_HOME/share/hadoop/mapreduce/lib" ]; then
+  CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce/lib'/*'
+fi
+
+CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce'/*'
+
 # cygwin path translation
 if $cygwin; then
   HADOOP_HDFS_HOME=`cygpath -w "$HADOOP_HDFS_HOME"`
diff --git a/hadoop-dist/pom.xml b/hadoop-dist/pom.xml
index ed6b729a93..93fe32be25 100644
--- a/hadoop-dist/pom.xml
+++ b/hadoop-dist/pom.xml
@@ -76,6 +76,9 @@
       <id>dist</id>
       <activation>
         <activeByDefault>false</activeByDefault>
+        <property>
+          <name>tar|rpm|deb</name>
+        </property>
       </activation>
       <build>
         <plugins>
@@ -114,15 +117,6 @@
                       run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs/target/hadoop-hdfs-${project.version}/* .
                       run cp -r $ROOT/hadoop-hdfs-project/hadoop-hdfs-httpfs/target/hadoop-hdfs-httpfs-${project.version}/* .
                       run cp -r $ROOT/hadoop-mapreduce-project/target/hadoop-mapreduce-${project.version}/* .
-                      COMMON_LIB=share/hadoop/common/lib
-                      MODULES=../../../../modules
-                      run ln -s $MODULES/hadoop-mapreduce-client-app-${project.version}.jar $COMMON_LIB
-                      run ln -s $MODULES/hadoop-yarn-api-${project.version}.jar $COMMON_LIB
-                      run ln -s $MODULES/hadoop-mapreduce-client-common-${project.version}.jar $COMMON_LIB
-                      run ln -s $MODULES/hadoop-yarn-common-${project.version}.jar $COMMON_LIB
-                      run ln -s $MODULES/hadoop-mapreduce-client-core-${project.version}.jar $COMMON_LIB
-                      run ln -s $MODULES/hadoop-yarn-server-common-${project.version}.jar $COMMON_LIB
-                      run ln -s $MODULES/hadoop-mapreduce-client-jobclient-${project.version}.jar $COMMON_LIB
                       echo
                       echo "Hadoop dist layout available at: ${project.build.directory}/hadoop-${project.version}"
                       echo
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index a851daf472..95a98ff344 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -323,6 +323,9 @@ Release 0.23.1 - Unreleased
     MAPREDUCE-3422. Counter display names are not being picked up. (Jonathan
     Eagles via sseth)
 
+    MAPREDUCE-3366. Mapreduce component should use consistent directory structure 
+    layout as HDFS/common (Eric Yang via mahadev)
+
 Release 0.23.0 - 2011-11-01 
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/INSTALL b/hadoop-mapreduce-project/INSTALL
index e6de8cb92e..e75b2aff2f 100644
--- a/hadoop-mapreduce-project/INSTALL
+++ b/hadoop-mapreduce-project/INSTALL
@@ -55,11 +55,11 @@ Step 8) Modify mapred-site.xml to use yarn framework
 
 Step 9) cd $YARN_HOME
 
-Step 10) bin/yarn-daemon.sh start resourcemanager
+Step 10) sbin/yarn-daemon.sh start resourcemanager
 
-Step 11) bin/yarn-daemon.sh start nodemanager
+Step 11) sbin/yarn-daemon.sh start nodemanager
 
-Step 12) bin/yarn-daemon.sh start historyserver
+Step 12) sbin/yarn-daemon.sh start historyserver
 
 Step 13) You are all set, an example on how to run a mapreduce job is:
 cd $HADOOP_MAPRED_HOME
diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/slaves.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/slaves.sh
index ee83477901..ee254603d6 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/bin/slaves.sh
+++ b/hadoop-mapreduce-project/hadoop-yarn/bin/slaves.sh
@@ -38,7 +38,7 @@ fi
 bin=`dirname "${BASH_SOURCE-$0}"`
 bin=`cd "$bin"; pwd`
 
-DEFAULT_LIBEXEC_DIR="$bin"
+DEFAULT_LIBEXEC_DIR="$bin"/../libexec
 HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
 . $HADOOP_LIBEXEC_DIR/yarn-config.sh
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/start-all.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/start-yarn.sh
similarity index 97%
rename from hadoop-mapreduce-project/hadoop-yarn/bin/start-all.sh
rename to hadoop-mapreduce-project/hadoop-yarn/bin/start-yarn.sh
index fa4fcf3d0d..ccd63a4478 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/bin/start-all.sh
+++ b/hadoop-mapreduce-project/hadoop-yarn/bin/start-yarn.sh
@@ -23,7 +23,7 @@ echo "starting yarn daemons"
 bin=`dirname "${BASH_SOURCE-$0}"`
 bin=`cd "$bin"; pwd`
 
-DEFAULT_LIBEXEC_DIR="$bin"
+DEFAULT_LIBEXEC_DIR="$bin"/../libexec
 HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
 . $HADOOP_LIBEXEC_DIR/yarn-config.sh
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/stop-all.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/stop-yarn.sh
similarity index 97%
rename from hadoop-mapreduce-project/hadoop-yarn/bin/stop-all.sh
rename to hadoop-mapreduce-project/hadoop-yarn/bin/stop-yarn.sh
index 546b67f5c9..c10d1ce7d1 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/bin/stop-all.sh
+++ b/hadoop-mapreduce-project/hadoop-yarn/bin/stop-yarn.sh
@@ -23,7 +23,7 @@ echo "stopping yarn daemons"
 bin=`dirname "${BASH_SOURCE-$0}"`
 bin=`cd "$bin"; pwd`
 
-DEFAULT_LIBEXEC_DIR="$bin"
+DEFAULT_LIBEXEC_DIR="$bin"/../libexec
 HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
 . $HADOOP_LIBEXEC_DIR/yarn-config.sh
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn
index b8e23a97f5..f5c8c1f8e8 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn
+++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn
@@ -44,7 +44,7 @@
 bin=`dirname "${BASH_SOURCE-$0}"`
 bin=`cd "$bin"; pwd`
 
-DEFAULT_LIBEXEC_DIR="$bin"
+DEFAULT_LIBEXEC_DIR="$bin"/../libexec
 HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
 . $HADOOP_LIBEXEC_DIR/yarn-config.sh
 
@@ -109,8 +109,7 @@ if [ ! -d "$HADOOP_CONF_DIR" ]; then
   exit 1
 fi
 
-CLASSPATH="${HADOOP_CONF_DIR}:${YARN_CONF_DIR}"
-CLASSPATH=${CLASSPATH}:${YARN_CLASSPATH}
+CLASSPATH="${HADOOP_CONF_DIR}:${YARN_CONF_DIR}:${CLASSPATH}"
 CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
 
 # for developers, add Hadoop classes to CLASSPATH
@@ -146,38 +145,6 @@ fi
 # so that filenames w/ spaces are handled correctly in loops below
 IFS=
 
-# add hadoop-common libs to CLASSPATH
-if [ ! -d "$HADOOP_COMMON_HOME" ]; then
-  if [ -d "$HADOOP_PREFIX" ]; then
-    export HADOOP_COMMON_HOME=$HADOOP_PREFIX
-  else
-    echo No HADOOP_COMMON_HOME set. 
-    echo Please specify it either in yarn-env.sh or in the environment.
-    exit 1
-  fi
-fi
-
-CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/share/hadoop/common'/*'
-CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/share/hadoop/common/lib'/*'
-
-# add hadoop-hdfs libs to CLASSPATH
-if [ ! -d "$HADOOP_HDFS_HOME" ]; then
-  if [ -d "$HADOOP_PREFIX" ]; then
-    export HADOOP_HDFS_HOME=$HADOOP_PREFIX
-  else
-    echo No HADOOP_HDFS_HOME set. 
-    echo Please specify it either in yarn-env.sh or in the environment.
-    exit 1
-  fi
-fi
-CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/share/hadoop/hdfs'/*'
-CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib'/*'
-
-# add yarn libs to CLASSPATH
-
-CLASSPATH=${CLASSPATH}:$YARN_HOME/modules'/*'
-CLASSPATH=${CLASSPATH}:$YARN_HOME/lib'/*'
-
 # default log directory & file
 if [ "$YARN_LOG_DIR" = "" ]; then
   YARN_LOG_DIR="$YARN_HOME/logs"
diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh
index 4371484b86..2757044273 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh
+++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh
@@ -15,29 +15,24 @@
 
 # included in all the hadoop scripts with source command
 # should not be executable directly
-# also should not be passed any arguments, since we need original $*
-
-# resolve links - $0 may be a softlink
-
-this="$0"
-while [ -h "$this" ]; do
-  ls=`ls -ld "$this"`
-  link=`expr "$ls" : '.*-> \(.*\)$'`
-  if expr "$link" : '.*/.*' > /dev/null; then
-    this="$link"
-  else
-    this=`dirname "$this"`/"$link"
-  fi
-done
-
-# convert relative path to absolute path
-bin=`dirname "$this"`
-script=`basename "$this"`
+bin=`which "$0"`
+bin=`dirname "${bin}"`
 bin=`cd "$bin"; pwd`
-this="$bin/$script"
 
-# the root of the Hadoop installation
-export YARN_HOME=`dirname "$this"`/..
+export HADOOP_PREFIX="${HADOOP_PREFIX:-$bin/..}"
+
+DEFAULT_LIBEXEC_DIR="$bin"/../libexec
+HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
+if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then
+  . ${HADOOP_LIBEXEC_DIR}/hadoop-config.sh
+elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then
+  . "$HADOOP_COMMON_HOME"/libexec/hadoop-config.sh
+elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then
+  . "$HADOOP_HOME"/libexec/hadoop-config.sh
+else
+  echo "Hadoop common not found."
+  exit
+fi
 
 # Same glibc bug that discovered in Hadoop.
 # Without this you can see very large vmem settings on containers.
@@ -56,7 +51,7 @@ then
 fi
  
 # Allow alternate conf dir location.
-YARN_CONF_DIR="${YARN_CONF_DIR:-$YARN_HOME/conf}"
+YARN_CONF_DIR="${HADOOP_CONF_DIR:-$YARN_HOME/conf}"
 
 #check to see it is specified whether to use the slaves or the
 # masters file
diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh
index 99fcb0a550..6e41f791c3 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh
+++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemon.sh
@@ -39,7 +39,7 @@ fi
 bin=`dirname "${BASH_SOURCE-$0}"`
 bin=`cd "$bin"; pwd`
 
-DEFAULT_LIBEXEC_DIR="$bin"
+DEFAULT_LIBEXEC_DIR="$bin"/../libexec
 HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
 . $HADOOP_LIBEXEC_DIR/yarn-config.sh
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemons.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemons.sh
index e34e4ca8b1..aafb42b9b1 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemons.sh
+++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-daemons.sh
@@ -30,7 +30,7 @@ fi
 bin=`dirname "${BASH_SOURCE-$0}"`
 bin=`cd "$bin"; pwd`
 
-DEFAULT_LIBEXEC_DIR="$bin"
+DEFAULT_LIBEXEC_DIR="$bin"/../libexec
 HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
 . $HADOOP_LIBEXEC_DIR/yarn-config.sh
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/conf/yarn-env.sh b/hadoop-mapreduce-project/hadoop-yarn/conf/yarn-env.sh
index b219eddf1a..cfcb250b8e 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/conf/yarn-env.sh
+++ b/hadoop-mapreduce-project/hadoop-yarn/conf/yarn-env.sh
@@ -41,10 +41,6 @@ if [ "$YARN_HEAPSIZE" != "" ]; then
   #echo $JAVA_HEAP_MAX
 fi
 
-# CLASSPATH initially contains $YARN_CONF_DIR
-CLASSPATH="${YARN_CONF_DIR}"
-CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
-
 # so that filenames w/ spaces are handled correctly in loops below
 IFS=
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java
index c4d0d78ea5..9439e21cfa 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java
@@ -95,8 +95,8 @@ public interface ApplicationConstants {
         "$HADOOP_COMMON_HOME/share/hadoop/common/lib/*",
         "$HADOOP_HDFS_HOME/share/hadoop/hdfs/*",
         "$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*",
-        "$YARN_HOME/modules/*",
-        "$YARN_HOME/lib/*"
+        "$YARN_HOME/share/hadoop/mapreduce/*",
+        "$YARN_HOME/share/hadoop/mapreduce/lib/*"
       };
   
   /**
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/SingleCluster.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/SingleCluster.apt.vm
index 3d34351708..f4ea1fe69c 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/SingleCluster.apt.vm
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/SingleCluster.apt.vm
@@ -171,20 +171,6 @@ Add the following configs to your <<<yarn-site.xml>>>
   </property>
 +---+
 
-* Create Symlinks.
-
-  You will have to create the following symlinks:
-
-+---+
-$ cd $HADOOP_COMMON_HOME/share/hadoop/common/lib/
-$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-app-*-SNAPSHOT.jar .
-$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-jobclient-*-SNAPSHOT.jar .
-$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-common-*-SNAPSHOT.jar .
-$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-shuffle-*-SNAPSHOT.jar .
-$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-core-*-SNAPSHOT.jar .
-$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-yarn-common-*-SNAPSHOT.jar .
-$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-yarn-api-*-SNAPSHOT.jar .
-+---+
 * Running daemons.
 
   Assuming that the environment variables <<$HADOOP_COMMON_HOME>>, <<$HADOOP_HDFS_HOME>>, <<$HADOO_MAPRED_HOME>>,
@@ -195,8 +181,8 @@ $ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-yarn-api-*-SNAPSHOT.jar .
   
 +---+
 $ cd $HADOOP_MAPRED_HOME
-$ bin/yarn-daemon.sh start resourcemanager
-$ bin/yarn-daemon.sh start nodemanager
+$ sbin/yarn-daemon.sh start resourcemanager
+$ sbin/yarn-daemon.sh start nodemanager
 +---+
 
   You should be up and running. You can run randomwriter as:
diff --git a/hadoop-mapreduce-project/pom.xml b/hadoop-mapreduce-project/pom.xml
index 74970dd5ee..b9e64473cf 100644
--- a/hadoop-mapreduce-project/pom.xml
+++ b/hadoop-mapreduce-project/pom.xml
@@ -34,6 +34,8 @@
     <test.timeout>600000</test.timeout>
     <fork.mode>once</fork.mode>
     <mr.basedir>${basedir}</mr.basedir>
+    <hadoop.component>mapreduce</hadoop.component>
+    <is.hadoop.component>true</is.hadoop.component>
   </properties>
 
   <modules>
@@ -321,7 +323,10 @@
 
   <profiles>
     <profile>
-      <id>release</id>
+      <id>dist</id>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
       <build>
         <plugins>
           <plugin>
@@ -336,16 +341,6 @@
               </execution>
             </executions>
           </plugin>
-        </plugins>
-      </build>
-    </profile>
-    <profile>
-      <id>dist</id>
-      <activation>
-        <activeByDefault>false</activeByDefault>
-      </activation>
-      <build>
-        <plugins>
           <plugin>
             <groupId>org.apache.maven.plugins</groupId>
             <artifactId>maven-assembly-plugin</artifactId>
@@ -367,7 +362,7 @@
             </configuration>
             <executions>
               <execution>
-                <id>dist</id>
+                <id>package-mapreduce</id>
                 <phase>prepare-package</phase>
                 <goals>
                   <goal>single</goal>

From a238f931ea7dce0ca620d1798156c84ff77097ff Mon Sep 17 00:00:00 2001
From: Amar Kamat <amarrk@apache.org>
Date: Fri, 16 Dec 2011 14:20:58 +0000
Subject: [PATCH 5/9] MAPREDUCE-778. Rumen Anonymizer. (Amar Kamat and Chris
 Douglas via amarrk)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1215141 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt          |    1 +
 hadoop-mapreduce-project/ivy.xml              |    7 +
 .../ivy/libraries.properties                  |    1 +
 .../hadoop/mapred/gridmix/GridmixJob.java     |   17 +-
 .../src/documentation/content/xdocs/rumen.xml |  172 +-
 .../tools/rumen/TestRumenAnonymization.java   | 1940 +++++++++++++++++
 .../hadoop/tools/rumen/TestRumenFolder.java   |    4 -
 .../tools/rumen/TestRumenJobTraces.java       |    4 +-
 .../apache/hadoop/tools/rumen/Anonymizer.java |  273 +++
 .../org/apache/hadoop/tools/rumen/Folder.java |   79 +-
 .../tools/rumen/HadoopLogsAnalyzer.java       |   45 +-
 .../apache/hadoop/tools/rumen/JobBuilder.java |   25 +-
 .../tools/rumen/JsonObjectMapperWriter.java   |   19 +
 .../apache/hadoop/tools/rumen/LoggedJob.java  |   78 +-
 .../hadoop/tools/rumen/LoggedLocation.java    |   47 +-
 .../tools/rumen/LoggedNetworkTopology.java    |   19 +-
 .../apache/hadoop/tools/rumen/LoggedTask.java |   11 +-
 .../hadoop/tools/rumen/LoggedTaskAttempt.java |   47 +-
 .../apache/hadoop/tools/rumen/ParsedHost.java |   14 +-
 .../hadoop/tools/rumen/ZombieCluster.java     |    7 +-
 .../apache/hadoop/tools/rumen/ZombieJob.java  |   52 +-
 .../rumen/anonymization/DataAnonymizer.java   |   27 +
 .../tools/rumen/anonymization/WordList.java   |  106 +
 .../WordListAnonymizerUtility.java            |  110 +
 .../rumen/datatypes/AnonymizableDataType.java |   28 +
 .../tools/rumen/datatypes/ClassName.java      |   57 +
 .../tools/rumen/datatypes/DataType.java       |   25 +
 .../DefaultAnonymizableDataType.java          |   67 +
 .../rumen/datatypes/DefaultDataType.java      |   37 +
 .../tools/rumen/datatypes/FileName.java       |  213 ++
 .../hadoop/tools/rumen/datatypes/JobName.java |   41 +
 .../tools/rumen/datatypes/JobProperties.java  |   93 +
 .../tools/rumen/datatypes/NodeName.java       |  185 ++
 .../tools/rumen/datatypes/QueueName.java      |   41 +
 .../tools/rumen/datatypes/UserName.java       |   40 +
 .../util/DefaultJobPropertiesParser.java      |   31 +
 .../datatypes/util/JobPropertyParser.java     |   34 +
 .../util/MapReduceJobPropertiesParser.java    |  227 ++
 .../rumen/serializers/BlockingSerializer.java |   36 +
 .../DefaultAnonymizingRumenSerializer.java    |   57 +
 .../serializers/DefaultRumenSerializer.java   |   42 +
 .../serializers/ObjectStringSerializer.java   |   35 +
 .../hadoop/tools/rumen/state/State.java       |   46 +
 .../tools/rumen/state/StateDeserializer.java  |   59 +
 .../hadoop/tools/rumen/state/StatePool.java   |  345 +++
 45 files changed, 4598 insertions(+), 246 deletions(-)
 create mode 100644 hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenAnonymization.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Anonymizer.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/anonymization/DataAnonymizer.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/anonymization/WordList.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/anonymization/WordListAnonymizerUtility.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/AnonymizableDataType.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/ClassName.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/DataType.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/DefaultAnonymizableDataType.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/DefaultDataType.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/FileName.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/JobName.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/JobProperties.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/NodeName.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/QueueName.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/UserName.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/util/DefaultJobPropertiesParser.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/util/JobPropertyParser.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/util/MapReduceJobPropertiesParser.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/BlockingSerializer.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/DefaultAnonymizingRumenSerializer.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/DefaultRumenSerializer.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/ObjectStringSerializer.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/state/State.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/state/StateDeserializer.java
 create mode 100644 hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/state/StatePool.java

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 95a98ff344..6d198a745e 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -6,6 +6,7 @@ Trunk (unreleased changes)
     MAPREDUCE-3545. Remove Avro RPC. (suresh)
 
   NEW FEATURES
+    MAPREDUCE-778. Rumen Anonymizer. (Amar Kamat and Chris Douglas via amarrk)
 
     MAPREDUCE-2669. Add new examples for Mean, Median, and Standard Deviation.
     (Plamen Jeliazkov via shv)
diff --git a/hadoop-mapreduce-project/ivy.xml b/hadoop-mapreduce-project/ivy.xml
index e9b38d077e..e04da7019b 100644
--- a/hadoop-mapreduce-project/ivy.xml
+++ b/hadoop-mapreduce-project/ivy.xml
@@ -139,6 +139,13 @@
    <dependency org="org.vafer" name="jdeb" rev="${jdeb.version}" conf="package->master"/>
    <dependency org="org.mortbay.jetty" name="jetty-servlet-tester" rev="${jetty.version}"
                conf="test->default"/>
+
+   <!-- dependency for rumen anonymization -->
+   <dependency org="org.codehaus.jackson" name="jackson-core-asl" rev="${jackson.version}"
+               conf="compile->default"/>
+   <dependency org="org.codehaus.jackson" name="jackson-mapper-asl" rev="${jackson.version}"
+               conf="compile->default"/>
+
    <!-- dependency addition for the fault injection -->
    <dependency org="org.aspectj" name="aspectjrt" rev="${aspectj.version}"
                conf="compile->default"/>
diff --git a/hadoop-mapreduce-project/ivy/libraries.properties b/hadoop-mapreduce-project/ivy/libraries.properties
index 360c5a9967..93a10282fc 100644
--- a/hadoop-mapreduce-project/ivy/libraries.properties
+++ b/hadoop-mapreduce-project/ivy/libraries.properties
@@ -81,5 +81,6 @@ wagon-http.version=1.0-beta-2
 xmlenc.version=0.52
 xerces.version=1.4.4
 
+jackson.version=1.8.2
 yarn.version=0.24.0-SNAPSHOT
 hadoop-mapreduce.version=0.24.0-SNAPSHOT
diff --git a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
index 9b6ed69f57..77ec697872 100644
--- a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
+++ b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/GridmixJob.java
@@ -26,8 +26,6 @@
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.Delayed;
 import java.util.concurrent.TimeUnit;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 import java.security.PrivilegedExceptionAction;
 
 import org.apache.hadoop.conf.Configuration;
@@ -49,6 +47,7 @@
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.tools.rumen.JobStory;
+import static org.apache.hadoop.tools.rumen.datatypes.util.MapReduceJobPropertiesParser.extractMaxHeapOpts;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -92,8 +91,6 @@ protected Formatter initialValue() {
   // configuration key to enable/disable task jvm options
   static final String GRIDMIX_TASK_JVM_OPTIONS_ENABLE = 
     "gridmix.task.jvm-options.enable";
-  private static final Pattern maxHeapPattern = 
-    Pattern.compile("-Xmx[0-9]+[kKmMgGtT]?+");
 
   private static void setJobQueue(Job job, String queue) {
     if (queue != null) {
@@ -225,18 +222,6 @@ private static void configureTaskJVMMaxHeapOptions(Configuration srcConf,
       }
     }
   }
-  
-  private static void extractMaxHeapOpts(String javaOptions,  
-      List<String> maxOpts,  List<String> others) {
-    for (String opt : javaOptions.split(" ")) {
-      Matcher matcher = maxHeapPattern.matcher(opt);
-      if (matcher.find()) {
-        maxOpts.add(opt);
-      } else {
-        others.add(opt);
-      }
-    }
-  }
 
   // Scales the desired job-level configuration parameter. This API makes sure 
   // that the ratio of the job level configuration parameter to the cluster 
diff --git a/hadoop-mapreduce-project/src/docs/src/documentation/content/xdocs/rumen.xml b/hadoop-mapreduce-project/src/docs/src/documentation/content/xdocs/rumen.xml
index 75b97ac5e8..dbe72c56ca 100644
--- a/hadoop-mapreduce-project/src/docs/src/documentation/content/xdocs/rumen.xml
+++ b/hadoop-mapreduce-project/src/docs/src/documentation/content/xdocs/rumen.xml
@@ -73,6 +73,11 @@
             computed for the total number of successful tasks for every attempt.
             
         </li>
+        <li>Anonymized traces enables sharing of production traces of large 
+            scale Hadoop deployments. Sharing of traces will foster 
+            collaboration within the Hadoop community. It can also be used to 
+            supplement interesting research findings.
+        </li>
       </ul>
     </section>
 
@@ -102,6 +107,11 @@
             Increasing the trace runtime might involve adding some dummy jobs to
             the resulting trace and scaling up the runtime of individual jobs.
        </li>
+       <li><em>Anonymizer</em> : 
+            A utility to anonymize Hadoop job and cluster topology traces by 
+            masking certain sensitive fields but retaining important workload 
+            characteristics.    
+        </li>
                  
       </ul>
       <p></p><p></p><p></p>
@@ -128,10 +138,11 @@
           <code>output-duration</code>, <code>concentration</code> etc.
     </note>
        
-    <p><em>Rumen</em> provides 2 basic commands</p>
+    <p><em>Rumen</em> provides 3 basic commands</p>
      <ul>
        <li><code>TraceBuilder</code></li>
        <li><code>Folder</code></li>
+       <li><code>Anonymizer</code></li>
      </ul>
        
     <p>Firstly, we need to generate the <em>Gold Trace</em>. Hence the first 
@@ -139,8 +150,9 @@
        The output of the <code>TraceBuilder</code> is a job-trace file (and an 
        optional cluster-topology file). In case we want to scale the output, we 
        can use the <code>Folder</code> utility to fold the current trace to the 
-       desired length. The remaining part of this section explains these 
-       utilities in detail.
+       desired length. For anonymizing the trace, use the 
+       <code>Anonymizer</code> utility. The remaining part of this section 
+       explains these utilities in detail.
     </p>
     
     <note>Examples in this section assumes that certain libraries are present 
@@ -426,8 +438,156 @@
         </p>
       </section>
     </section>
+    <p></p><p></p><p></p><p></p><p></p><p></p><p></p><p></p><p></p><p></p>
+    <p></p><p></p><p></p><p></p><p></p><p></p><p></p><p></p>
+    <p></p><p></p><p></p><p></p><p></p><p></p><p></p><p></p>
+    <p></p><p></p><p></p><p></p><p></p><p></p><p></p><p></p><p></p><p></p><p></p>
+    <p></p><p></p><p></p><p></p>
+    
+    </section>
+    
+    <!--
+     Anonymizer command
+    -->
+    <section>
+      <title>Anonymizer</title>
+      
+      <p><code>Command:</code></p>
+      <source>java org.apache.hadoop.tools.rumen.Anonymizer [options] [-trace &lt;jobtrace-input&gt; &lt;jobtrace-output&gt;] [-topology &lt;topology-input&gt; &lt;topology-output&gt;]</source>
+
+      <p>This command invokes the <em>Anonymizer</em> utility of 
+         <em>Rumen</em>. It anonymizes sensitive information from the 
+         <code>&lt;jobtrace-input&gt;</code> file and outputs the anonymized 
+         content into the <code>&lt;jobtrace-output&gt;</code>
+         file. It also anonymizes the cluster layout (topology) from the
+         <code>&lt;topology-input&gt;</code> and outputs it in
+         the <code>&lt;topology-output&gt;</code> file.
+         <code>&lt;job-input&gt;</code> represents the job trace file obtained 
+         using <code>TraceBuilder</code> or <code>Folder</code>. 
+         <code>&lt;topology-input&gt;</code> represents the cluster topology 
+         file obtained using <code>TraceBuilder</code>.
+      </p>
+         
+      <p><code>Options :</code></p>
+      <table>
+        <tr>
+          <th>Parameter</th>
+          <th>Description</th>
+          <th>Notes</th>
+        </tr>
+        <tr>
+          <td><code>-trace</code></td>
+          <td>Anonymizes job traces.</td>
+          <td>Anonymizes sensitive fields like user-name, job-name, queue-name
+              host-names, job configuration parameters etc.</td>
+        </tr>
+        <tr>
+          <td><code>-topology</code></td>
+          <td>Anonymizes cluster topology</td>
+          <td>Anonymizes rack-names and host-names.</td>
+        </tr>
+      </table>
+      
+      <section id="anonymizerconf">
+      <title><em>Anonymizer</em> Configuration Parameters</title>
+      <p>The Rumen anonymizer can be configured using the following 
+         configuration parameters:
+      </p>
+      <table>
+        <tr>
+          <th>Parameter</th>
+          <th>Description</th>
+        </tr>
+        <tr>
+          <td>
+            <code>rumen.data-types.classname.preserve</code>
+          </td>
+          <td>A comma separated list of prefixes that the <em>Anonymizer</em> 
+              will not anonymize while processing classnames. If 
+              <code>rumen.data-types.classname.preserve</code> is set to 
+              <code>'org.apache,com.hadoop.'</code> then 
+              classnames starting with <code>'org.apache'</code> or 
+              <code>'com.hadoop.'</code> will not be anonymized. 
+          </td>
+        </tr>
+        <tr>
+          <td>
+            <code>rumen.datatypes.jobproperties.parsers</code>
+          </td>
+          <td>A comma separated list of job properties parsers. These parsers 
+              decide how the job configuration parameters 
+              (i.e &lt;key,value&gt; pairs)  should be processed. Default is 
+              <code>MapReduceJobPropertiesParser</code>. The default parser will
+              only parse framework-level MapReduce specific job configuration 
+              properties. Users can add custom parsers by implementing the 
+              <code>JobPropertiesParser</code> interface. Rumen also provides an
+              all-pass (i.e no filter) parser called 
+              <code>DefaultJobPropertiesParser</code>.
+          </td>
+        </tr>
+        <tr>
+          <td>
+            <code>rumen.anonymization.states.dir</code>
+          </td>
+          <td>Set this to a location (on LocalFileSystem or HDFS) for enabling
+              state persistence and/or reload. This parameter is not set by 
+              default. Reloading and persistence of states depend on the state 
+              directory. Note that the state directory will contain the latest 
+              as well as previous states.
+          </td>
+        </tr>
+        <tr>
+          <td>
+            <code>rumen.anonymization.states.persist</code>
+          </td>
+          <td>Set this to <code>'true'</code> to persist the current state. 
+              Default value is <code>'false'</code>. Note that the states will 
+              be persisted to the state manager's state directory 
+              specified using the <code>rumen.anonymization.states.dir</code> 
+              parameter.
+          </td>
+        </tr>
+        <tr>
+          <td>
+            <code>rumen.anonymization.states.reload</code>
+          </td>
+          <td>Set this to <code>'true'</code> to enable reuse of previously 
+              persisted state. The default value is <code>'false'</code>. The 
+              previously persisted state will be reloaded from the state 
+              manager's state directory specified using the 
+              <code>rumen.anonymization.states.dir</code> parameter. Note that 
+              the <em>Anonymizer</em> will bail out if it fails to find any 
+              previously persisted state in the state directory or if the state
+              directory is not set. If the user wishes to retain/reuse the 
+              states across multiple invocations of the <em>Anonymizer</em>, 
+              then the very first invocation of the <em>Anonymizer</em> should
+              have <code>rumen.anonymization.states.reload</code> set to 
+              <code>'false'</code> and 
+              <code>rumen.anonymization.states.persist</code> set to 
+              <code>'true'</code>. Subsequent invocations of the 
+              <em>Anonymizer</em> can then have 
+              <code>rumen.anonymization.states.reload</code> set to 
+              <code>'true'</code>.
+          </td>
+        </tr>
+      </table>
+    </section>
+    
+      <section>
+        <title>Example</title>
+        <source>java org.apache.hadoop.tools.rumen.Anonymizer -trace file:///home/user/job-trace.json file:///home/user/job-trace-anonymized.json -topology file:///home/user/cluster-topology.json file:///home/user/cluster-topology-anonymized.json</source>
+        <p></p>
+        <p>This will anonymize the job details from
+           <code>file:///home/user/job-trace.json</code> and output it to
+           <code>file:///home/user/job-trace-anonymized.json</code>. 
+           It will also anonymize the cluster topology layout from 
+           <code>file:///home/user/cluster-topology.json</code> and output it to
+           <code>file:///home/user/cluster-topology-anonymized.json</code>.
+           Note that the <code>Anonymizer</code> also supports input and output 
+           files on HDFS.
+        </p>
+      </section>
     </section>
-    <p></p><p></p><p></p>
   </section>
   
   <!--
@@ -452,8 +612,8 @@
         <li><code>Hadoop Common</code> (<code>hadoop-common-{hadoop-version}.jar</code>)</li>
         <li><code>Apache Commons Logging</code> (<code>commons-logging-1.1.1.jar</code>)</li>
         <li><code>Apache Commons CLI</code> (<code>commons-cli-1.2.jar</code>)</li>
-        <li><code>Jackson Mapper</code> (<code>jackson-mapper-asl-1.4.2.jar</code>)</li>
-        <li><code>Jackson Core</code> (<code>jackson-core-asl-1.4.2.jar</code>)</li>
+        <li><code>Jackson Mapper</code> (<code>jackson-mapper-asl-1.8.2.jar</code>)</li>
+        <li><code>Jackson Core</code> (<code>jackson-core-asl-1.8.2.jar</code>)</li>
       </ul>
       
       <note>One simple way to run Rumen is to use '$HADOOP_PREFIX/bin/hadoop jar' 
diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenAnonymization.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenAnonymization.java
new file mode 100644
index 0000000000..48da80e46e
--- /dev/null
+++ b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenAnonymization.java
@@ -0,0 +1,1940 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.mapreduce.Cluster;
+import org.apache.hadoop.mapreduce.ID;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.mapreduce.MapReduceTestUtil;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.TaskID;
+import org.apache.hadoop.mapreduce.TaskType;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistory;
+import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.tools.rumen.anonymization.*;
+import org.apache.hadoop.tools.rumen.datatypes.*;
+import org.apache.hadoop.tools.rumen.datatypes.FileName.FileNameState;
+import org.apache.hadoop.tools.rumen.datatypes.NodeName.NodeNameState;
+import org.apache.hadoop.tools.rumen.datatypes.util.DefaultJobPropertiesParser;
+import org.apache.hadoop.tools.rumen.datatypes.util.MapReduceJobPropertiesParser;
+import org.apache.hadoop.tools.rumen.serializers.*;
+import org.apache.hadoop.tools.rumen.state.*;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.codehaus.jackson.JsonEncoding;
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.Version;
+import org.codehaus.jackson.annotate.JsonIgnore;
+import org.codehaus.jackson.map.JsonSerializer;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.map.module.SimpleModule;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+/**
+ * Tests Rumen output anonymization.
+ */
+@SuppressWarnings("deprecation")
+public class TestRumenAnonymization {
+  /**
+   * Test {@link UserName}, serialization and anonymization.
+   */
+  @Test
+  public void testUserNameSerialization() throws IOException {
+    JsonSerializer<?> defaultSerializer = new DefaultRumenSerializer();
+    
+    JsonSerializer<?> anonymizingSerializer = 
+      new DefaultAnonymizingRumenSerializer(new StatePool(), 
+                                            new Configuration());
+    // test username
+    UserName uname = new UserName("bob");
+    assertEquals("Username error!", "bob", uname.getValue());
+    
+    // test username serialization
+    //   test with no anonymization
+    //      test bob
+    testSerializer(new UserName("bob"), "bob", defaultSerializer);
+    //      test alice
+    testSerializer(new UserName("alice"), "alice", defaultSerializer);
+    
+    // test user-name serialization
+    //   test with anonymization
+    //      test bob
+    testSerializer(new UserName("bob"), "user0", anonymizingSerializer);
+    //      test alice
+    testSerializer(new UserName("alice"), "user1", anonymizingSerializer);
+  }
+  
+  /**
+   * Test {@link JobName}, serialization and anonymization.
+   */
+  @Test
+  public void testJobNameSerialization() throws IOException {
+    JsonSerializer<?> defaultSerializer = new DefaultRumenSerializer();
+    
+    JsonSerializer<?> anonymizingSerializer = 
+      new DefaultAnonymizingRumenSerializer(new StatePool(), 
+                                            new Configuration());
+    
+    // test jobname
+    JobName jname = new JobName("job-secret");
+    assertEquals("Jobname error!", "job-secret", jname.getValue());
+    
+    // test job-name serialization
+    //  test with no anonymization
+    //      test job1
+    testSerializer(new JobName("job-myjob"), "job-myjob", defaultSerializer);
+    //      test job2
+    testSerializer(new JobName("job-yourjob"), "job-yourjob", 
+                   defaultSerializer);
+    
+    // test job-name serialization
+    //   test with anonymization
+    //  test queue1
+    testSerializer(new JobName("secret-job-1"), "job0", anonymizingSerializer);
+    //      test queue2
+    testSerializer(new JobName("secret-job-2"), "job1", anonymizingSerializer);
+  }
+  
+  /**
+   * Test {@link QueueName}, serialization and anonymization.
+   */
+  @Test
+  public void testQueueNameSerialization() throws IOException {
+    JsonSerializer<?> defaultSerializer = new DefaultRumenSerializer();
+    
+    JsonSerializer<?> anonymizingSerializer = 
+      new DefaultAnonymizingRumenSerializer(new StatePool(), 
+                                            new Configuration());
+    
+    // test queuename
+    QueueName qname = new QueueName("queue-secret");
+    assertEquals("Queuename error!", "queue-secret", qname.getValue());
+    
+    // test queuename serialization
+    //  test with no anonymization
+    //      test queue1
+    testSerializer(new QueueName("project1-queue"), 
+                   "project1-queue", defaultSerializer);
+    //      test queue2
+    testSerializer(new QueueName("project2-queue"), 
+                   "project2-queue", defaultSerializer);
+    
+    // test queue-name serialization
+    //   test with anonymization
+    //  test queue1
+    testSerializer(new QueueName("project1-queue"), 
+                   "queue0", anonymizingSerializer);
+    //      test queue2
+    testSerializer(new QueueName("project2-queue"), 
+                   "queue1", anonymizingSerializer);
+  }
+  
+  /**
+   * Test {@link NodeName}.
+   */
+  @Test
+  public void testNodeNameDataType() throws IOException {
+    // test hostname
+    //   test only hostname
+    NodeName hname = new NodeName("host1.myorg.com");
+    assertNull("Expected missing rack name", hname.getRackName());
+    assertEquals("Hostname's test#1 hostname error!", 
+                 "host1.myorg.com", hname.getHostName());
+    assertEquals("Hostname test#1 error!", "host1.myorg.com", hname.getValue());
+    
+    //   test rack/hostname
+    hname = new NodeName("/rack1.myorg.com/host1.myorg.com");
+    assertEquals("Hostname's test#2 rackname error!", 
+                 "rack1.myorg.com", hname.getRackName());
+    assertEquals("Hostname test#2 hostname error!", 
+                 "host1.myorg.com", hname.getHostName());
+    assertEquals("Hostname test#2 error!", 
+                 "/rack1.myorg.com/host1.myorg.com", hname.getValue());
+    
+    //   test hostname and rackname separately
+    hname = new NodeName("rack1.myorg.com", "host1.myorg.com");
+    assertEquals("Hostname's test#3 rackname error!", 
+                 "rack1.myorg.com", hname.getRackName());
+    assertEquals("Hostname test#3 hostname error!", 
+                 "host1.myorg.com", hname.getHostName());
+    assertEquals("Hostname test#3 error!", 
+                 "/rack1.myorg.com/host1.myorg.com", hname.getValue());
+    
+    //   test hostname with no rackname
+    hname = new NodeName(null, "host1.myorg.com");
+    assertNull("Hostname's test#4 rackname error!", hname.getRackName());
+    assertEquals("Hostname test#4 hostname error!", 
+                 "host1.myorg.com", hname.getHostName());
+    assertEquals("Hostname test#4 error!", 
+                 "host1.myorg.com", hname.getValue());
+    
+    //  test rackname with no hostname
+    hname = new NodeName("rack1.myorg.com", null);
+    assertEquals("Hostname test#4 rackname error!", 
+                 "rack1.myorg.com", hname.getRackName());
+    assertNull("Hostname's test#5 hostname error!", hname.getHostName());
+    assertEquals("Hostname test#5 error!", 
+                 "rack1.myorg.com", hname.getValue());
+  }
+  
+  /**
+   * Test {@link NodeName} serialization.
+   */
+  @Test
+  public void testNodeNameDefaultSerialization() throws IOException {
+    JsonSerializer<?> defaultSerializer = new DefaultRumenSerializer();
+    
+    // test hostname serialization
+    //  test with no anonymization
+    //      test hostname
+    testSerializer(new NodeName("hostname.myorg.com"), "hostname.myorg.com",
+                   defaultSerializer);
+    //      test rack/hostname
+    testSerializer(new NodeName("/rackname.myorg.com/hostname.myorg.com"), 
+                   "/rackname.myorg.com/hostname.myorg.com",
+                   defaultSerializer);
+    //      test rack,hostname
+    testSerializer(new NodeName("rackname.myorg.com", "hostname.myorg.com"), 
+                   "/rackname.myorg.com/hostname.myorg.com",
+                   defaultSerializer);
+    //      test -,hostname
+    testSerializer(new NodeName(null, "hostname.myorg.com"), 
+                   "hostname.myorg.com", defaultSerializer);
+    //      test rack,-
+    testSerializer(new NodeName("rackname.myorg.com", null), 
+                   "rackname.myorg.com", defaultSerializer);
+  }
+  
+  /**
+   * Test {@link NodeName} anonymization.
+   */
+  @Test
+  public void testNodeNameAnonymization() throws IOException {
+    JsonSerializer<?> anonymizingSerializer = 
+      new DefaultAnonymizingRumenSerializer(new StatePool(), 
+                                            new Configuration());
+    
+    // test hostname serializer
+    //   test with anonymization
+    //      test hostname
+    testSerializer(new NodeName("hostname.myorg.com"), "host0",
+                   anonymizingSerializer);
+    //      test hostname reuse
+    testSerializer(new NodeName("hostname213.myorg.com"), "host1",
+                   anonymizingSerializer);
+    //      test rack/hostname
+    testSerializer(new NodeName("/rackname.myorg.com/hostname.myorg.com"), 
+                   "/rack0/host0", anonymizingSerializer);
+    //  test rack/hostname (hostname reuse)
+    testSerializer(new NodeName("/rackname654.myorg.com/hostname.myorg.com"), 
+                   "/rack1/host0", anonymizingSerializer);
+    //  test rack/hostname (rack reuse)
+    testSerializer(new NodeName("/rackname654.myorg.com/hostname765.myorg.com"), 
+                   "/rack1/host2", anonymizingSerializer);
+    //  test rack,hostname (rack & hostname reuse)
+    testSerializer(new NodeName("rackname.myorg.com", "hostname.myorg.com"), 
+                   "/rack0/host0", anonymizingSerializer);
+    //      test rack,hostname (rack reuse)
+    testSerializer(new NodeName("rackname.myorg.com", "hostname543.myorg.com"), 
+                   "/rack0/host3", anonymizingSerializer);
+    //      test rack,hostname (hostname reuse)
+    testSerializer(new NodeName("rackname987.myorg.com", "hostname.myorg.com"), 
+                   "/rack2/host0", anonymizingSerializer);
+    //      test rack,hostname (rack reuse)
+    testSerializer(new NodeName("rackname.myorg.com", "hostname654.myorg.com"), 
+                   "/rack0/host4", anonymizingSerializer);
+    //      test rack,hostname (host reuse)
+    testSerializer(new NodeName("rackname876.myorg.com", "hostname.myorg.com"), 
+                   "/rack3/host0", anonymizingSerializer);
+    //      test rack,hostname (rack & hostname reuse)
+    testSerializer(new NodeName("rackname987.myorg.com", 
+                                "hostname543.myorg.com"), 
+                   "/rack2/host3", anonymizingSerializer);
+    //      test -,hostname (hostname reuse)
+    testSerializer(new NodeName(null, "hostname.myorg.com"), 
+                   "host0", anonymizingSerializer);
+    //      test -,hostname 
+    testSerializer(new NodeName(null, "hostname15.myorg.com"), 
+                   "host5", anonymizingSerializer);
+    //      test rack,- (rack reuse)
+    testSerializer(new NodeName("rackname987.myorg.com", null), 
+                   "rack2", anonymizingSerializer);
+    //      test rack,- 
+    testSerializer(new NodeName("rackname15.myorg.com", null), 
+                   "rack4", anonymizingSerializer);
+  }
+  
+  /**
+   * Test {@link JobProperties}.
+   */
+  @Test
+  public void testJobPropertiesDataType() throws IOException {
+    // test job properties
+    Properties properties = new Properties();
+    JobProperties jp = new JobProperties(properties);
+    
+    // test empty job properties
+    assertEquals("Job Properties (default) store error", 
+                 0, jp.getValue().size());
+    // test by adding some data
+    properties.put("test-key", "test-value"); // user config
+    properties.put(MRJobConfig.USER_NAME, "bob"); // job config
+    properties.put(JobConf.MAPRED_TASK_JAVA_OPTS, "-Xmx1G"); // deprecated
+    jp = new JobProperties(properties);
+    assertEquals("Job Properties (default) store error", 
+                 3, jp.getValue().size());
+    assertEquals("Job Properties (default) key#1 error", 
+                 "test-value", jp.getValue().get("test-key"));
+    assertEquals("Job Properties (default) key#2 error", 
+                 "bob", jp.getValue().get(MRJobConfig.USER_NAME));
+    assertEquals("Job Properties (default) key#3 error", 
+                 "-Xmx1G", jp.getValue().get(JobConf.MAPRED_TASK_JAVA_OPTS));
+  }
+  
+  /**
+   * Test {@link JobProperties} serialization.
+   */
+  @Test
+  public void testJobPropertiesSerialization() throws IOException {
+    JsonSerializer<?> defaultSerializer = new DefaultRumenSerializer();
+    
+    // test job properties
+    Properties properties = new Properties();
+    properties.put("test-key", "test-value"); // user config
+    properties.put(MRJobConfig.USER_NAME, "bob"); // job config
+    properties.put(JobConf.MAPRED_TASK_JAVA_OPTS, "-Xmx1G"); // deprecated
+    JobProperties jp = new JobProperties(properties);
+    
+    testSerializer(jp, "{test-key:test-value," 
+                       + "mapreduce.job.user.name:bob," 
+                       + "mapred.child.java.opts:-Xmx1G}", defaultSerializer);
+  }
+  
+  /**
+   * Test {@link JobProperties} anonymization.
+   */
+  @Test
+  public void testJobPropertiesAnonymization() throws IOException {
+    // test job properties
+    Properties properties = new Properties();
+    Configuration conf = new Configuration();
+    
+    properties.put("test-key", "test-value"); // user config
+    properties.put(MRJobConfig.USER_NAME, "bob"); // job config
+    // deprecated
+    properties.put("mapred.map.child.java.opts", 
+                   "-Xmx2G -Xms500m -Dsecret=secret");
+    // deprecated and not supported
+    properties.put(JobConf.MAPRED_TASK_JAVA_OPTS, 
+                   "-Xmx1G -Xms200m -Dsecret=secret");
+    JobProperties jp = new JobProperties(properties);
+    
+    // define a module
+    SimpleModule module = new SimpleModule("Test Anonymization Serializer",  
+                                           new Version(0, 0, 0, "TEST"));
+    // add various serializers to the module
+    module.addSerializer(DataType.class, new DefaultRumenSerializer());
+    module.addSerializer(AnonymizableDataType.class, 
+                         new DefaultAnonymizingRumenSerializer(new StatePool(),
+                                                               conf));
+    
+    //TODO Support deprecated and un-supported keys
+    testSerializer(jp, "{mapreduce.job.user.name:user0," 
+                       + "mapred.map.child.java.opts:-Xmx2G -Xms500m}", module);
+  }
+  
+  /**
+   * Test {@link ClassName}, serialization and anonymization.
+   */
+  @Test
+  public void testClassNameSerialization() throws IOException {
+    JsonSerializer<?> defaultSerializer = new DefaultRumenSerializer();
+    
+    JsonSerializer<?> anonymizingSerializer = 
+      new DefaultAnonymizingRumenSerializer(new StatePool(), 
+                                            new Configuration());
+    
+    // test classname
+    ClassName cName = new ClassName(TestRumenAnonymization.class.getName());
+    assertEquals("Classname error!", TestRumenAnonymization.class.getName(), 
+                 cName.getValue());
+    
+    // test classname serialization
+    //  test with no anonymization
+    //      test class1
+    testSerializer(new ClassName("org.apache.hadoop.Test"), 
+                   "org.apache.hadoop.Test", defaultSerializer);
+    //      test class2
+    testSerializer(new ClassName("org.apache.hadoop.Test2"), 
+                   "org.apache.hadoop.Test2", defaultSerializer);
+    
+    // test class-name serialization
+    //  test with anonymization
+    //      test class1
+    testSerializer(new ClassName("org.apache.hadoop.Test1"), 
+                   "class0", anonymizingSerializer);
+    //      test class2
+    testSerializer(new ClassName("org.apache.hadoop.Test2"), 
+                   "class1", anonymizingSerializer);
+    
+    // test classnames with preserves
+    Configuration conf = new Configuration();
+    conf.set(ClassName.CLASSNAME_PRESERVE_CONFIG, "org.apache.hadoop.");
+    anonymizingSerializer = 
+      new DefaultAnonymizingRumenSerializer(new StatePool(), conf);
+    // test word with prefix
+    testSerializer(new ClassName("org.apache.hadoop.Test3"), 
+                   "org.apache.hadoop.Test3", anonymizingSerializer);
+    // test word without prefix
+    testSerializer(new ClassName("org.apache.hadoop2.Test4"), 
+                   "class0", anonymizingSerializer);
+  }
+  
+  /**
+   * Test {@link FileName}.
+   */
+  @Test
+  public void testFileName() throws IOException {
+    // test file on hdfs
+    FileName hFile = new FileName("hdfs://testnn:123/user/test.json");
+    assertEquals("Filename error!", "hdfs://testnn:123/user/test.json", 
+                 hFile.getValue());
+    // test file on local-fs
+    hFile = new FileName("file:///user/test.json");
+    assertEquals("Filename error!", "file:///user/test.json", 
+                 hFile.getValue());
+    // test dir on hdfs
+    hFile = new FileName("hdfs://testnn:123/user/");
+    assertEquals("Filename error!", "hdfs://testnn:123/user/",
+                 hFile.getValue());
+    // test dir on local-fs
+    hFile = new FileName("file:///user/");
+    assertEquals("Filename error!", "file:///user/", hFile.getValue());
+    // test absolute file
+    hFile = new FileName("/user/test/test.json");
+    assertEquals("Filename error!", "/user/test/test.json", hFile.getValue());
+    // test absolute directory
+    hFile = new FileName("/user/test/");
+    assertEquals("Filename error!", "/user/test/", hFile.getValue());
+    // test relative file
+    hFile = new FileName("user/test/test2.json");
+    assertEquals("Filename error!", "user/test/test2.json", hFile.getValue());
+    // test relative directory
+    hFile = new FileName("user/test/");
+    assertEquals("Filename error!", "user/test/", hFile.getValue());
+    // test absolute file
+    hFile = new FileName("user");
+    assertEquals("Filename error!", "user", hFile.getValue());
+    // test absolute directory
+    hFile = new FileName("user/");
+    assertEquals("Filename error!", "user/", hFile.getValue());
+    hFile = new FileName("./tmp");
+    assertEquals("Filename error!","./tmp", hFile.getValue());
+    hFile = new FileName("./tmp/");
+    assertEquals("Filename error!","./tmp/", hFile.getValue());
+    hFile = new FileName("../tmp");
+    assertEquals("Filename error!","../tmp", hFile.getValue());
+    hFile = new FileName("../tmp/");
+    assertEquals("Filename error!","../tmp/", hFile.getValue());
+    
+    // test comma separated filenames
+    //  test hdfs filenames, absolute and local-fs filenames
+    hFile = new FileName("hdfs://testnn:123/user/test1," 
+                         + "file:///user/test2,/user/test3");
+    assertEquals("Filename error!", 
+                 "hdfs://testnn:123/user/test1,file:///user/test2,/user/test3", 
+                 hFile.getValue());
+  }
+  
+  /**
+   * Test {@link FileName} serialization.
+   */
+  @Test
+  public void testFileNameSerialization() throws IOException {
+    JsonSerializer<?> defaultSerializer = new DefaultRumenSerializer();
+    
+    // test filename serialization
+    //  test with no anonymization
+    //      test a file on hdfs
+    testSerializer(new FileName("hdfs://mynn:123/home/user/test.json"), 
+                   "hdfs://mynn:123/home/user/test.json", defaultSerializer);
+    // test a file on local-fs
+    testSerializer(new FileName("file:///home/user/test.json"), 
+                   "file:///home/user/test.json", defaultSerializer);
+    // test directory on hdfs
+    testSerializer(new FileName("hdfs://mynn:123/home/user/"), 
+                   "hdfs://mynn:123/home/user/", defaultSerializer);
+    // test directory on local fs
+    testSerializer(new FileName("file:///home/user/"), 
+                   "file:///home/user/", defaultSerializer);
+    // test absolute file
+    testSerializer(new FileName("/home/user/test.json"), 
+                   "/home/user/test.json", defaultSerializer);
+    // test relative file
+    testSerializer(new FileName("home/user/test.json"), 
+                   "home/user/test.json", defaultSerializer);
+    // test absolute folder
+    testSerializer(new FileName("/home/user/"), "/home/user/", 
+                   defaultSerializer);
+    // test relative folder
+    testSerializer(new FileName("home/user/"), "home/user/", 
+                   defaultSerializer);
+    // relative file
+    testSerializer(new FileName("home"), "home", defaultSerializer);
+    // relative folder
+    testSerializer(new FileName("home/"), "home/", defaultSerializer);
+    // absolute file
+    testSerializer(new FileName("/home"), "/home", defaultSerializer);
+    // absolute folder
+    testSerializer(new FileName("/home/"), "/home/", defaultSerializer);
+    // relative folder
+    testSerializer(new FileName("./tmp"), "./tmp", defaultSerializer);
+    testSerializer(new FileName("./tmp/"), "./tmp/", defaultSerializer);
+    testSerializer(new FileName("../tmp"), "../tmp", defaultSerializer);
+    
+    // test comma separated filenames
+    //  test hdfs filenames, absolute and local-fs filenames
+    FileName fileName = 
+      new FileName("hdfs://testnn:123/user/test1,file:///user/test2,"
+                   + "/user/test3");
+    testSerializer(fileName, 
+        "hdfs://testnn:123/user/test1,file:///user/test2,/user/test3",
+        defaultSerializer);
+  }
+  
+  /**
+   * Test {@link FileName} anonymization.
+   */
+  @Test
+  public void testFileNameAnonymization() throws IOException {
+    JsonSerializer<?> anonymizingSerializer = 
+      new DefaultAnonymizingRumenSerializer(new StatePool(), 
+                                            new Configuration());
+    
+    // test filename serialization
+    //  test with no anonymization
+    //      test hdfs file
+    testSerializer(new FileName("hdfs://mynn:123/home/user/bob/test.json"),
+        "hdfs://host0/home/user/dir0/test.json", anonymizingSerializer);
+    //      test local-fs file
+    testSerializer(new FileName("file:///home/user/alice/test.jar"), 
+        "file:///home/user/dir1/test.jar", anonymizingSerializer);
+    //      test hdfs dir
+    testSerializer(new FileName("hdfs://mynn:123/home/user/"),
+                   "hdfs://host0/home/user/", anonymizingSerializer);
+    //      test local-fs dir
+    testSerializer(new FileName("file:///home/user/secret/more-secret/"), 
+                   "file:///home/user/dir2/dir3/", anonymizingSerializer);
+    //  test absolute filenames
+    testSerializer(new FileName("/home/user/top-secret.txt"),
+                   "/home/user/file0.txt", anonymizingSerializer);
+    //      test relative filenames
+    testSerializer(new FileName("home/user/top-top-secret.zip"), 
+                   "home/user/file1.zip", anonymizingSerializer);
+    //  test absolute dirnames
+    testSerializer(new FileName("/home/user/project1/"),
+                   "/home/user/dir4/", anonymizingSerializer);
+    //      test relative filenames
+    testSerializer(new FileName("home/user/project1"), 
+                   "home/user/file2", anonymizingSerializer);
+    //  test absolute dirnames (re-use)
+    testSerializer(new FileName("more-secret/"),
+                   "dir3/", anonymizingSerializer);
+    //      test relative filenames (re-use)
+    testSerializer(new FileName("secret/project1"), 
+                   "dir2/file2", anonymizingSerializer);
+    //  test absolute filenames (re-use)
+    testSerializer(new FileName("/top-secret.txt"),
+                   "/file0.txt", anonymizingSerializer);
+    //  test relative filenames (re-use)
+    testSerializer(new FileName("top-top-secret.tar"), 
+                   "file1.tar", anonymizingSerializer);
+    //  test absolute dirname
+    testSerializer(new FileName("sensitive-projectname/"),
+                   "dir5/", anonymizingSerializer);
+    //  test relative filenames 
+    testSerializer(new FileName("/real-sensitive-projectname/"), 
+                   "/dir6/", anonymizingSerializer);
+    //  test absolute filenames 
+    testSerializer(new FileName("/usernames.xml"),
+                   "/file3.xml", anonymizingSerializer);
+    //  test relative filenames 
+    testSerializer(new FileName("passwords.zip"), 
+                   "file4.zip", anonymizingSerializer);
+    //  test relative filenames 
+    testSerializer(new FileName("./tmp"), 
+                   "./tmp", anonymizingSerializer);
+    testSerializer(new FileName("./tmp/"), 
+                   "./tmp/", anonymizingSerializer);
+    testSerializer(new FileName("../tmp"), 
+                   "../tmp", anonymizingSerializer);
+    testSerializer(new FileName("../tmp/"), 
+                   "../tmp/", anonymizingSerializer);
+    
+    // test comma separated filenames
+    //  test hdfs filenames, absolute and local-fs filenames
+    FileName fileName = 
+      new FileName("hdfs://mynn:123/home/user/bob/test.json," 
+                   + "file:///home/user/bob/test.json,/user/alice/test.json");
+    testSerializer(fileName, 
+        "hdfs://host0/home/user/dir0/test.json,file:///home/user/dir0/test.json"
+        + ",/user/dir1/test.json",
+        anonymizingSerializer);
+  }
+  
+  
+  /**
+   * Test {@link DefaultDataType} serialization.
+   */
+  @Test
+  public void testDefaultDataTypeSerialization() throws IOException {
+    JsonSerializer<?> defaultSerializer = new DefaultRumenSerializer();
+    
+    // test default data-type
+    DefaultDataType dt = new DefaultDataType("test");
+    assertEquals("DefaultDataType error!", "test", dt.getValue());
+    
+    // test default data-type
+    //  test with no anonymization
+    //      test data
+    testSerializer(new DefaultDataType("test"), "test", defaultSerializer);
+  }
+  
+  // A faked OutputStream which stores the stream content into a StringBuffer.
+  private static class MyOutputStream extends OutputStream {
+    private StringBuffer data = new StringBuffer();
+    
+    @Override
+    public void write(int b) throws IOException {
+      data.append((char)b);
+    }
+    
+    @Override
+    public void write(byte[] b) throws IOException {
+      data.append(b);
+    }
+    
+    @Override
+    public String toString() {
+      // remove all the '"' for ease of testing
+      return data.toString().trim().replaceAll("\"", "");
+    }
+  }
+  
+  // tests the object serializing using the class of the specified object
+  @SuppressWarnings("unchecked")
+  private static void testSerializer(Object toBeSerialized, String expData, 
+                                     JsonSerializer serializer) 
+  throws IOException {
+    // define a module
+    SimpleModule module = new SimpleModule("Test Anonymization Serializer",  
+                                           new Version(0, 0, 0, "TEST"));
+    // add various serializers to the module
+    module.addSerializer(toBeSerialized.getClass(), serializer);
+    testSerializer(toBeSerialized, expData, module);
+  }
+  
+  // tests the object serializing using the specified class
+  private static void testSerializer(Object toBeSerialized, String expData, 
+                                     SimpleModule module) 
+  throws IOException {
+    // define a custom generator
+    ObjectMapper outMapper = new ObjectMapper();
+    
+    // register the module
+    outMapper.registerModule(module);
+    
+    // get the json factory
+    JsonFactory outFactory = outMapper.getJsonFactory();
+    // define a fake output stream which will cache the data
+    MyOutputStream output = new MyOutputStream();
+    // define the json output generator
+    JsonGenerator outGen = 
+      outFactory.createJsonGenerator(output, JsonEncoding.UTF8);
+    
+    // serialize the object
+    outGen.writeObject(toBeSerialized);
+    //serializer.serialize(toBeSerialized, outGen, null);
+    
+    // close the json generator so that it flushes out the data to the output
+    // stream
+    outGen.close();
+    
+    assertEquals("Serialization failed!", expData, output.toString());
+  }
+  
+  /**
+   * Test {@link DefaultRumenSerializer}.
+   */
+  @Test
+  public void testDefaultDataSerializers() throws Exception {
+    JsonSerializer<?> defaultSer = new DefaultRumenSerializer();
+    // test default data-type
+    //  test with no anonymization
+    //      test data
+    testSerializer(new DefaultDataType("test"), "test", defaultSer);
+  }
+  
+  @Test
+  public void testBlockingDataSerializers() throws Exception {
+    JsonSerializer<?> blockingSerializer = new BlockingSerializer();
+    
+    // test string serializer
+    testSerializer("username:password", "null", blockingSerializer);
+  }
+  
+  @Test
+  public void testObjectStringDataSerializers() throws Exception {
+    JsonSerializer<?> objectStringSerializer = new ObjectStringSerializer<ID>();
+    // test job/task/attempt id serializer
+    //   test job-id 
+    JobID jid = JobID.forName("job_1_1");
+    testSerializer(jid, jid.toString(), objectStringSerializer);
+    //   test task-id
+    TaskID tid = new TaskID(jid, TaskType.MAP, 1);
+    testSerializer(tid, tid.toString(), objectStringSerializer);
+    //   test attempt-id
+    TaskAttemptID aid = new TaskAttemptID(tid, 0);
+    testSerializer(aid, aid.toString(), objectStringSerializer);
+  }
+  
+  // test anonymizer
+  @Test
+  public void testRumenAnonymization() throws Exception {
+    Configuration conf = new Configuration();
+
+    // Run a MR job
+    // create a MR cluster
+    conf.setInt(TTConfig.TT_MAP_SLOTS, 1);
+    conf.setInt(TTConfig.TT_REDUCE_SLOTS, 1);
+    
+    MiniDFSCluster dfsCluster = null;
+    MiniMRCluster mrCluster =  null;
+    
+    // local filesystem for running TraceBuilder
+    FileSystem lfs = FileSystem.getLocal(conf);
+    Path rootTempDir =
+      new Path(System.getProperty("test.build.data", "/tmp"));
+
+    Path tempDir = new Path(rootTempDir, "testRumenAnonymization");
+    tempDir = lfs.makeQualified(tempDir);
+    lfs.delete(tempDir, true);
+    
+    try {
+      dfsCluster = new MiniDFSCluster(conf, 1, true, null);
+      String[] racks = new String[] {"/rack123.myorg.com", 
+                                     "/rack456.myorg.com"};
+      String[] hosts = new String[] {"host1230.myorg.com", 
+                                     "host4560.myorg.com"};
+      mrCluster = 
+        new MiniMRCluster(2, dfsCluster.getFileSystem().getUri().toString(), 
+                          1, racks, hosts, new JobConf(conf));
+
+      // run a job
+      Path inDir = new Path("secret-input");
+      Path outDir = new Path("secret-output");
+
+      JobConf jConf = mrCluster.createJobConf();
+      // add some usr sensitive data in the job conf
+      jConf.set("user-secret-code", "abracadabra");
+      
+      jConf.setJobName("top-secret");
+      // construct a job with 1 map and 1 reduce task.
+      Job job = MapReduceTestUtil.createJob(jConf, inDir, outDir, 2, 2);
+      // wait for the job to complete
+      job.waitForCompletion(false);
+
+      assertTrue("Job failed", job.isSuccessful());
+
+      JobID id = job.getJobID();
+      Cluster cluster = new Cluster(jConf);
+      String user = cluster.getAllJobStatuses()[0].getUsername();
+
+      // get the jobhistory filepath
+      Path jhPath = 
+        new Path(mrCluster.getJobTrackerRunner().getJobTracker()
+                          .getJobHistoryDir());
+      Path inputLogPath = JobHistory.getJobHistoryFile(jhPath, id, user);
+      Path inputConfPath = JobHistory.getConfFile(jhPath, id);
+      // wait for 10 secs for the jobhistory file to move into the done folder
+      FileSystem fs = inputLogPath.getFileSystem(jConf);
+      for (int i = 0; i < 100; ++i) {
+        if (fs.exists(inputLogPath)) {
+          break;
+        }
+        TimeUnit.MILLISECONDS.wait(100);
+      }
+
+      assertTrue("Missing job history file", fs.exists(inputLogPath));
+
+      // run trace builder on the job history logs
+      Path goldTraceFilename = new Path(tempDir, "trace.json");
+      Path goldTopologyFilename = new Path(tempDir, "topology.json");
+
+      // build the trace-builder command line args
+      String[] args = new String[] {goldTraceFilename.toString(), 
+                                    goldTopologyFilename.toString(),
+                                    inputLogPath.toString(),
+                                    inputConfPath.toString()};
+      Tool analyzer = new TraceBuilder();
+      int result = ToolRunner.run(analyzer, args);
+      assertEquals("Non-zero exit", 0, result);
+
+      // anonymize the job trace
+      Path anonymizedTraceFilename = new Path(tempDir, "trace-anonymized.json");
+      Path anonymizedClusterTopologyFilename = 
+        new Path(tempDir, "topology-anonymized.json");
+      args = new String[] {"-trace", goldTraceFilename.toString(), 
+                           anonymizedTraceFilename.toString(),
+                           "-topology", goldTopologyFilename.toString(), 
+                           anonymizedClusterTopologyFilename.toString()};
+      Tool anonymizer = new Anonymizer();
+      result = ToolRunner.run(anonymizer, args);
+      assertEquals("Non-zero exit", 0, result);
+
+      JobTraceReader reader = new JobTraceReader(anonymizedTraceFilename, conf);
+      LoggedJob anonymizedJob = reader.getNext();
+      reader.close(); // close the reader as we need only 1 job
+      // test
+      //   user-name
+      String currentUser = UserGroupInformation.getCurrentUser().getUserName();
+      assertFalse("Username not anonymized!", 
+                  currentUser.equals(anonymizedJob.getUser().getValue()));
+      //   jobid
+      assertEquals("JobID mismatch!", 
+                   id.toString(), anonymizedJob.getJobID().toString());
+      //   queue-name
+      assertFalse("Queuename mismatch!", 
+                  "default".equals(anonymizedJob.getQueue().getValue()));
+      //   job-name
+      assertFalse("Jobname mismatch!", 
+                  "top-secret".equals(anonymizedJob.getJobName().getValue()));
+      
+      //   job properties
+      for (Map.Entry<Object, Object> entry : 
+           anonymizedJob.getJobProperties().getValue().entrySet()) {
+        assertFalse("User sensitive configuration key not anonymized", 
+                    entry.getKey().toString().equals("user-secret-code"));
+        assertFalse("User sensitive data not anonymized", 
+                    entry.getValue().toString().contains(currentUser));
+        assertFalse("User sensitive data not anonymized", 
+                    entry.getValue().toString().contains("secret"));
+      }
+      
+      // test map tasks
+      testTasks(anonymizedJob.getMapTasks(), id, TaskType.MAP);
+      
+      // test reduce tasks
+      testTasks(anonymizedJob.getReduceTasks(), id, TaskType.REDUCE);
+      
+      // test other tasks
+      testTasks(anonymizedJob.getOtherTasks(), id, null);
+
+      // test the anonymized cluster topology file
+      ClusterTopologyReader cReader = 
+        new ClusterTopologyReader(anonymizedClusterTopologyFilename, conf);
+      LoggedNetworkTopology loggedNetworkTopology = cReader.get();
+      // test the cluster topology
+      testClusterTopology(loggedNetworkTopology, 0, "myorg");
+    } finally {
+      // shutdown and cleanup
+      if (mrCluster != null) {
+        mrCluster.shutdown();
+      }
+      
+      if (dfsCluster != null) {
+        dfsCluster.formatDataNodeDirs();
+        dfsCluster.shutdown();
+      }
+      lfs.delete(tempDir, true);
+    }
+  }
+  
+  // test task level details lije
+  //   - taskid
+  //   - locality info
+  //   - attempt details
+  //     - attempt execution hostname
+  private static void testTasks(List<LoggedTask> tasks, JobID id, 
+                                TaskType type) {
+    int index = 0;
+    for (LoggedTask task : tasks) {
+      // generate the expected task id for this task
+      if (type != null) {
+        TaskID tid = new TaskID(id, type, index++);
+        assertEquals("TaskID mismatch!", 
+                     tid.toString(), task.getTaskID().toString());
+      }
+
+      // check locality information
+      if (task.getPreferredLocations() != null) {
+        for (LoggedLocation loc : task.getPreferredLocations()) {
+          for (NodeName name : loc.getLayers()) {
+            assertFalse("Hostname mismatch!", 
+                        name.getValue().contains("myorg"));
+          }
+        }
+      }
+      
+      // check execution host
+      for (LoggedTaskAttempt attempt : task.getAttempts()) {
+        // generate the expected task id for this task
+        TaskAttemptID aid = new TaskAttemptID(task.getTaskID(), 0);
+        assertEquals("TaskAttemptID mismatch!", 
+                     aid.toString(), attempt.getAttemptID().toString());
+
+        assertNotNull("Hostname null!", attempt.getHostName());
+        assertFalse("Hostname mismatch!", 
+                    attempt.getHostName().getValue().contains("myorg"));
+      }
+    }
+  }
+  
+  // tests the logged network topology
+  private static void testClusterTopology(LoggedNetworkTopology topology, 
+                                          int level, String bannedString) {
+    assertFalse("Cluster topology test failed!", 
+                topology.getName().getValue().contains(bannedString));
+    if (level == 0) {
+      assertEquals("Level-1 data mismatch!", 
+                   "<root>", topology.getName().getValue());
+    } else if (level == 1) {
+      assertTrue("Level-2 data mismatch!", 
+                 topology.getName().getValue().contains("rack"));
+      assertFalse("Level-2 data mismatch!", 
+                 topology.getName().getValue().contains("host"));
+    } else {
+      assertTrue("Level-2 data mismatch!", 
+                 topology.getName().getValue().contains("host"));
+      assertFalse("Level-2 data mismatch!", 
+                  topology.getName().getValue().contains("rack"));
+    }
+    
+    // if the current node is a rack, then test the nodes under it
+    if (topology.getChildren() != null) {
+      for (LoggedNetworkTopology child : topology.getChildren()) {
+        testClusterTopology(child, level + 1, bannedString);
+      }
+    }
+  }
+  
+  @Test
+  public void testCLI() throws Exception {
+    Configuration conf = new Configuration();
+    FileSystem lfs = FileSystem.getLocal(conf);
+    Path rootTempDir =
+      new Path(System.getProperty("test.build.data", "/tmp"));
+
+    Path tempDir = new Path(rootTempDir, "testCLI");
+    tempDir = lfs.makeQualified(tempDir);
+    lfs.delete(tempDir, true);
+    
+    // test no args
+    String[] args = new String[] {};
+    testAnonymizerCLI(args, -1);
+    
+    // test with wrong args
+    args = new String[] {"-test"};
+    testAnonymizerCLI(args, -1);
+    
+    args = new String[] {"-trace"};
+    testAnonymizerCLI(args, -1);
+    
+    args = new String[] {"-topology"};
+    testAnonymizerCLI(args, -1);
+    
+    args = new String[] {"-trace -topology"};
+    testAnonymizerCLI(args, -1);
+    
+    Path testTraceInputFilename = new Path(tempDir, "trace-in.json");
+    args = new String[] {"-trace", testTraceInputFilename.toString()};
+    testAnonymizerCLI(args, -1);
+    
+    Path testTraceOutputFilename = new Path(tempDir, "trace-out.json");
+    args = new String[] {"-trace", testTraceInputFilename.toString(), 
+                         testTraceOutputFilename.toString()};
+    testAnonymizerCLI(args, -1);
+    
+    OutputStream out = lfs.create(testTraceInputFilename);
+    out.write("{\n}".getBytes());
+    out.close();
+    args = new String[] {"-trace", testTraceInputFilename.toString(), 
+                         testTraceOutputFilename.toString()};
+    testAnonymizerCLI(args, 0);
+    
+    Path testToplogyInputFilename = new Path(tempDir, "topology-in.json");
+    args = new String[] {"-topology", testToplogyInputFilename.toString()};
+    testAnonymizerCLI(args, -1);
+    
+    Path testTopologyOutputFilename = new Path(tempDir, "topology-out.json");
+    args = new String[] {"-topology", testToplogyInputFilename.toString(), 
+                         testTopologyOutputFilename.toString()};
+    testAnonymizerCLI(args, -1);
+    
+    out = lfs.create(testToplogyInputFilename);
+    out.write("{\n}".getBytes());
+    out.close();
+    args = new String[] {"-topology", testToplogyInputFilename.toString(), 
+                         testTopologyOutputFilename.toString()};
+    testAnonymizerCLI(args, 0);
+    
+    args = new String[] {"-trace", testTraceInputFilename.toString(), 
+                         "-topology", testToplogyInputFilename.toString()};
+    testAnonymizerCLI(args, -1);
+
+    args = new String[] {"-trace", testTraceInputFilename.toString(), 
+                         testTraceOutputFilename.toString(),
+                         "-topology", testToplogyInputFilename.toString(), 
+                         testTopologyOutputFilename.toString()};
+    testAnonymizerCLI(args, 0);
+  }
+  
+  // tests the Anonymizer CLI via the Tools interface
+  private static void testAnonymizerCLI(String[] args, int eExitCode) 
+  throws Exception {
+    Anonymizer anonymizer = new Anonymizer();
+    
+    int exitCode = ToolRunner.run(anonymizer, args);
+    assertEquals("Exit code mismatch", eExitCode, exitCode);
+  }
+  
+  /**
+   * Test {@link StatePool}'s reload and persistence feature.
+   */
+  @Test
+  public void testStatePool() throws Exception {
+    Configuration conf = new Configuration();
+    FileSystem lfs = FileSystem.getLocal(conf);
+    Path rootTempDir =
+      new Path(System.getProperty("test.build.data", "/tmp"));
+
+    Path tempDir = new Path(rootTempDir, "testStatePool");
+    tempDir = lfs.makeQualified(tempDir);
+    lfs.delete(tempDir, true);
+    
+    // set the state dir
+    conf.set(StatePool.DIR_CONFIG, tempDir.toString());
+    
+    StatePool pool = new StatePool();
+    // test reload, persist and dir config
+    //   test with no reload and persist
+    pool.initialize(conf);
+    
+    //  test with reload and/or persist enabled with no dir
+    assertNull("Default state pool error", 
+               pool.getState(MyState.class));
+    
+    // try persisting 
+    pool.persist();
+    assertFalse("State pool persisted when disabled", lfs.exists(tempDir));
+    
+    // test wrongly configured state-pool
+    conf.setBoolean(StatePool.RELOAD_CONFIG, true);
+    conf.unset(StatePool.DIR_CONFIG);
+    pool = new StatePool();
+    boolean success = true;
+    try {
+      pool.initialize(conf);
+    } catch (Exception e) {
+      success = false;
+    }
+    assertFalse("State pool bad configuration succeeded", success);
+    
+    // test wrongly configured state-pool
+    conf.setBoolean(StatePool.RELOAD_CONFIG, false);
+    conf.setBoolean(StatePool.PERSIST_CONFIG, true);
+    pool = new StatePool();
+    success = true;
+    try {
+      pool.initialize(conf);
+    } catch (Exception e) {
+      success = false;
+    }
+    assertFalse("State manager bad configuration succeeded", success);
+    
+    
+    // test persistence
+    conf.setBoolean(StatePool.RELOAD_CONFIG, false);
+    conf.set(StatePool.DIR_CONFIG, tempDir.toString());
+    conf.setBoolean(StatePool.PERSIST_CONFIG, true);
+    pool = new StatePool();
+    pool.initialize(conf);
+    
+    // add states to the state pool
+    MyState myState = new MyState();
+    pool.addState(MyState.class, myState);
+    myState.setState("test-1");
+    // try persisting 
+    pool.persist();
+    assertTrue("State pool persisted when enabled", lfs.exists(tempDir));
+    assertEquals("State pool persisted when enabled", 
+                 1, lfs.listStatus(tempDir).length);
+    
+    // reload
+    conf.setBoolean(StatePool.RELOAD_CONFIG, true);
+    conf.set(StatePool.DIR_CONFIG, tempDir.toString());
+    conf.setBoolean(StatePool.PERSIST_CONFIG, true);
+    pool = new StatePool();
+    pool.initialize(conf);
+    MyState pState = 
+      (MyState) pool.getState(MyState.class);
+    assertEquals("State pool persistence/reload failed", "test-1", 
+                 pState.getState());
+    
+    // try persisting with no state change
+    pool.persist();
+    assertEquals("State pool persisted when disabled", 
+                 1, lfs.listStatus(tempDir).length);
+    
+    // modify the state of the pool and check persistence
+    pState.setUpdated(true);
+    pool.persist();
+    assertEquals("State pool persisted when disabled", 
+                 2, lfs.listStatus(tempDir).length);
+    
+    // delete the temp directory if everything goes fine
+    lfs.delete(tempDir, true);
+  }
+  
+  /**
+   * Test state.
+   */
+  static class MyState implements State {
+    private boolean updated = false;
+    private String state = "null";
+    
+    @Override
+    @JsonIgnore
+    public String getName() {
+      return "test";
+    }
+    
+    @Override
+    public void setName(String name) {
+      // for now, simply assert since this class has a hardcoded name
+      if (!getName().equals(name)) {
+        throw new RuntimeException("State name mismatch! Expected '" 
+                                   + getName() + "' but found '" + name + "'.");
+      }
+    }
+    
+    public void setState(String state) {
+      this.state = state;
+    }
+    
+    public String getState() {
+      return state;
+    }
+    
+    void setUpdated(boolean up) {
+      this.updated = up;
+    }
+    
+    @Override
+    @JsonIgnore
+    public boolean isUpdated() {
+      return updated;
+    }
+  }
+  
+  @SuppressWarnings("unchecked")
+  private static String getValueFromDataType(Object object) {
+    DataType<String> dt = (DataType<String>) object;
+    return dt.getValue();
+  }
+  
+  @Test
+  public void testJobPropertiesParser() {
+    // test default parser
+    Properties properties = new Properties();
+    Configuration conf = new Configuration();
+    JobProperties jp = new JobProperties(properties);
+    assertEquals("Job Properties (default filter) store error", 
+                 0, jp.getAnonymizedValue(null, conf).size());
+    
+    // define key-value pairs for job configuration
+    String key1 = "test-key";
+    String value1 = "test-value";
+    properties.put(key1, value1); // user config
+    String key2 = MRJobConfig.USER_NAME;
+    String value2 = "bob";
+    properties.put(key2, value2); // job config
+    String key3 = JobConf.MAPRED_MAP_TASK_JAVA_OPTS;
+    String value3 = "-Xmx1G";
+    properties.put(key3, value3); // deprecated
+    String key4 = MRJobConfig.REDUCE_JAVA_OPTS;
+    String value4 = "-Xms100m";
+    properties.put(key4, value4);
+    
+    jp = new JobProperties(properties);
+    
+    // Configure the default parser
+    conf.set(JobProperties.PARSERS_CONFIG_KEY, 
+             DefaultJobPropertiesParser.class.getName());
+    // anonymize
+    Properties defaultProp = jp.getAnonymizedValue(null, conf);
+    assertEquals("Job Properties (all-pass filter) store error", 
+                 4, defaultProp.size());
+    assertEquals("Job Properties (default filter) key#1 error", value1, 
+                 getValueFromDataType(defaultProp.get(key1)));
+    assertEquals("Job Properties (default filter) key#2 error", value2, 
+                 getValueFromDataType(defaultProp.get(key2)));
+    assertEquals("Job Properties (default filter) key#3 error", value3, 
+                 getValueFromDataType(defaultProp.get(key3)));
+    assertEquals("Job Properties (default filter) key#4 error", value4, 
+                 getValueFromDataType(defaultProp.get(key4)));
+    
+    // test MR parser
+    conf.set(JobProperties.PARSERS_CONFIG_KEY, 
+             MapReduceJobPropertiesParser.class.getName());
+    // anonymize
+    Properties filteredProp = jp.getAnonymizedValue(null, conf);
+    assertEquals("Job Properties (MR filter) store error", 
+                 3, filteredProp.size());
+    assertNull("Job Properties (MR filter) key#1 error", 
+               filteredProp.get(key1));
+    assertEquals("Job Properties (MR filter) key#2 error", value2, 
+                 getValueFromDataType(filteredProp.get(key2)));
+    assertEquals("Job Properties (MR filter) key#3 error", value3, 
+                 getValueFromDataType(filteredProp.get(key3)));
+    assertEquals("Job Properties (MR filter) key#4 error", value4, 
+                 getValueFromDataType(filteredProp.get(key4)));
+  }
+  
+  /**
+   * Test {@link WordListAnonymizerUtility}. Test various features like
+   *   - test known words
+   *   - test known suffix
+   */
+  @Test
+  public void testWordListBasedAnonymizer() {
+    String[] knownSuffixes = new String[] {".1", ".2", ".3", ".4"};
+    
+    // test with valid suffix
+    assertTrue("suffix test#0 failed!", 
+               WordListAnonymizerUtility.hasSuffix("a.1", knownSuffixes));
+    String split[] = 
+      WordListAnonymizerUtility.extractSuffix("a.1", knownSuffixes);
+    assertEquals("suffix test#1 failed!", 2, split.length);
+    assertEquals("suffix test#2 failed!", "a", split[0]);
+    assertEquals("suffix test#3 failed!", ".1", split[1]);
+    
+    // test with valid suffix
+    assertTrue("suffix test#0 failed!",
+               WordListAnonymizerUtility.hasSuffix("a.1", knownSuffixes));
+    split = 
+      WordListAnonymizerUtility.extractSuffix("/a/b.2", knownSuffixes);
+    assertEquals("suffix test#0 failed!", 2, split.length);
+    assertEquals("suffix test#1 failed!", "/a/b", split[0]);
+    assertEquals("suffix test#2 failed!", ".2", split[1]);
+    
+    // test with invalid suffix
+    assertFalse("suffix test#0 failed!", 
+                WordListAnonymizerUtility.hasSuffix("a.b", knownSuffixes));
+    
+    boolean failed = false;
+    try {
+      split = WordListAnonymizerUtility.extractSuffix("a.b", knownSuffixes);
+    } catch (Exception e) {
+      failed = true;
+    }
+    assertTrue("Exception expected!", failed);
+    
+    String[] knownWords = new String[] {"a", "b"};
+    
+    // test with valid data
+    assertTrue("data test#0 failed!", 
+               WordListAnonymizerUtility.isKnownData("a", knownWords));
+    // test with valid data
+    assertTrue("data test#1 failed!", 
+               WordListAnonymizerUtility.isKnownData("b", knownWords));
+    // test with invalid data
+    assertFalse("data test#2 failed!", 
+                WordListAnonymizerUtility.isKnownData("c", knownWords));
+    
+    // test with valid known word
+    assertTrue("data test#3 failed!", 
+               WordListAnonymizerUtility.isKnownData("job"));
+    // test with invalid known word
+    assertFalse("data test#4 failed!", 
+                WordListAnonymizerUtility.isKnownData("bob"));
+    
+    // test numeric data
+    assertFalse("Numeric test failed!", 
+                 WordListAnonymizerUtility.needsAnonymization("123"));
+    // test numeric data (unsupported)
+    assertTrue("Numeric test failed!", 
+               WordListAnonymizerUtility.needsAnonymization("123.456"));
+    // test text data
+    assertTrue("Text test failed!", 
+               WordListAnonymizerUtility.needsAnonymization("123abc"));
+  }
+  
+  /**
+   * Test {@link WordList} features like
+   *   - add words
+   *   - index 
+   *   - contains
+   */
+  @Test
+  public void testWordList() throws Exception {
+    // test features with fresh state
+    WordList wordList = new WordList();
+    assertFalse("Word list state incorrect", wordList.isUpdated());
+    
+    // add some special word
+    String test = "abbracadabra";
+    wordList.add(test);
+    assertTrue("Word list failed to store", wordList.contains(test));
+    assertEquals("Word list index failed", 0, wordList.indexOf(test));
+    assertEquals("Word list size failed", 1, wordList.getSize());
+    assertTrue("Word list state incorrect", wordList.isUpdated());
+    
+    // add already added word
+    wordList.add(test);
+    assertEquals("Word list index failed", 0, wordList.indexOf(test));
+    assertEquals("Word list size failed", 1, wordList.getSize());
+    assertTrue("Word list state incorrect", wordList.isUpdated());
+    
+    String test2 = "hakuna-matata";
+    wordList.add(test2);
+    assertTrue("Word list failed to store", wordList.contains(test2));
+    assertEquals("Word list index failed", 1, wordList.indexOf(test2));
+    assertEquals("Word list size failed", 2, wordList.getSize());
+    assertTrue("Word list state incorrect", wordList.isUpdated());
+
+    // test persistence
+    Configuration conf = new Configuration();
+    FileSystem lfs = FileSystem.getLocal(conf);
+    Path rootTempDir =
+      new Path(System.getProperty("test.build.data", "/tmp"));
+
+    Path tempDir = new Path(rootTempDir, "testWordList");
+    tempDir = lfs.makeQualified(tempDir);
+    lfs.delete(tempDir, true);
+    
+    // define a state pool to help persist the wordlist
+    StatePool pool = new StatePool();
+    
+    try {
+      // set the persistence directory
+      conf.set(StatePool.DIR_CONFIG, tempDir.toString());
+      conf.setBoolean(StatePool.PERSIST_CONFIG, true);
+
+      // initialize the state-pool
+      pool.initialize(conf);
+
+      // add the wordlist to the pool
+      pool.addState(getClass(), wordList);
+
+      pool.persist();
+
+      // now clear the pool state
+      pool = new StatePool();
+      
+      // set reload to true
+      conf.setBoolean(StatePool.RELOAD_CONFIG, true);
+      // initialize the state-pool
+      pool.initialize(conf);
+
+      State state = pool.getState(getClass());
+      assertNotNull("Missing state!", state);
+      assertEquals("Incorrect state class!", WordList.class, state.getClass());
+      WordList pList = (WordList) state;
+
+      // check size
+      assertEquals("Word list size on reload failed", 2, pList.getSize());
+      assertFalse("Word list state incorrect", pList.isUpdated());
+
+      // add already added word
+      pList.add(test);
+      assertEquals("Word list index on reload failed", 0, pList.indexOf(test));
+      assertEquals("Word list size on reload failed", 2, pList.getSize());
+      assertFalse("Word list state on reload incorrect", pList.isUpdated());
+
+      String test3 = "disco-dancer";
+      assertFalse("Word list failed to after reload", pList.contains(test3));
+      pList.add(test3);
+      assertTrue("Word list failed to store on reload", pList.contains(test3));
+      assertEquals("Word list index on reload failed", 2, pList.indexOf(test3));
+      assertEquals("Word list size on reload failed", 3, pList.getSize());
+      assertTrue("Word list state on reload incorrect", pList.isUpdated());
+      
+      // test previously added (persisted) word
+      assertTrue("Word list failed to store on reload", pList.contains(test2));
+      assertEquals("Word list index on reload failed", 1, pList.indexOf(test2));
+    } finally {
+      lfs.delete(tempDir, true);
+    }
+  }
+  
+  /**
+   * Test {@link FileName#FileNameState} persistence with directories only.
+   */
+  @Test
+  public void testFileNameStateWithDir() throws Exception {
+    // test persistence
+    Configuration conf = new Configuration();
+    FileSystem lfs = FileSystem.getLocal(conf);
+    Path rootTempDir =
+      new Path(System.getProperty("test.build.data", "/tmp"));
+
+    Path tempDir = new Path(rootTempDir, "testFileNameStateWithDir");
+    tempDir = lfs.makeQualified(tempDir);
+    lfs.delete(tempDir, true);
+    
+    // set the persistence directory
+    conf.set(StatePool.DIR_CONFIG, tempDir.toString());
+    conf.setBoolean(StatePool.PERSIST_CONFIG, true);
+    
+    // define a state pool to help persist the dirs
+    StatePool pool = new StatePool();
+    
+    FileNameState fState = new FileNameState();
+    
+    // define the directory names
+    String test1 = "test";
+    String test2 = "home";
+    
+    // test dir only
+    WordList dirState = new WordList("dir");
+    dirState.add(test1);
+    dirState.add(test2);
+    
+    // set the directory state
+    fState.setDirectoryState(dirState);
+    
+    try {
+      // initialize the state-pool
+      pool.initialize(conf);
+
+      // add the wordlist to the pool
+      pool.addState(getClass(), fState);
+
+      // persist the state
+      pool.persist();
+
+      // now clear the pool state
+      pool = new StatePool();
+      
+      // set reload to true
+      conf.setBoolean(StatePool.RELOAD_CONFIG, true);
+      
+      // initialize the state-pool
+      pool.initialize(conf);
+
+      State state = pool.getState(getClass());
+      assertNotNull("Missing state!", state);
+      assertEquals("Incorrect state class!", 
+                    FileNameState.class, state.getClass());
+      FileNameState newFState = (FileNameState) state;
+
+      // check the state contents
+      WordList newStateWordList = newFState.getDirectoryState();
+      assertTrue("File state failed to store on reload", 
+                 newStateWordList.contains(test1));
+      assertEquals("File state index on reload failed", 
+                   0, newStateWordList.indexOf(test1));
+      
+      assertTrue("File state failed to store on reload", 
+                 newStateWordList.contains(test2));
+      assertEquals("File state index on reload failed", 
+                   1, newStateWordList.indexOf(test2));
+    } finally {
+      lfs.delete(tempDir, true);
+    }
+  }
+  
+  /**
+   * Test {@link FileName#FileNameState} persistence with files only.
+   */
+  @Test
+  public void testFileNameStateWithFiles() throws Exception {
+    // test persistence
+    Configuration conf = new Configuration();
+    FileSystem lfs = FileSystem.getLocal(conf);
+    Path rootTempDir =
+      new Path(System.getProperty("test.build.data", "/tmp"));
+
+    Path tempDir = new Path(rootTempDir, "testFileNameStateWithFiles");
+    tempDir = lfs.makeQualified(tempDir);
+    lfs.delete(tempDir, true);
+    
+    // set the persistence directory
+    conf.set(StatePool.DIR_CONFIG, tempDir.toString());
+    conf.setBoolean(StatePool.PERSIST_CONFIG, true);
+    
+    // define a state pool to help persist the filename parts
+    StatePool pool = new StatePool();
+    
+    FileNameState fState = new FileNameState();
+    
+    // define the file names
+    String test1 = "part-00.bzip";
+    String test2 = "file1.txt";
+    
+    // test filenames only
+    WordList fileNameState = new WordList("files");
+    fileNameState.add(test1);
+    fileNameState.add(test2);
+    
+    // set the filename state
+    fState.setDirectoryState(fileNameState);
+    
+    try {
+      // initialize the state-pool
+      pool.initialize(conf);
+
+      // add the wordlist to the pool
+      pool.addState(getClass(), fState);
+
+      // persist the state
+      pool.persist();
+
+      // now clear the pool state
+      pool = new StatePool();
+      
+      // set reload to true
+      conf.setBoolean(StatePool.RELOAD_CONFIG, true);
+      
+      // initialize the state-pool
+      pool.initialize(conf);
+
+      State state = pool.getState(getClass());
+      assertNotNull("Missing state!", state);
+      assertEquals("Incorrect state class!", 
+                    FileNameState.class, state.getClass());
+      FileNameState newFState = (FileNameState) state;
+
+      // check the state contents
+      WordList newFileWordList = newFState.getDirectoryState();
+      assertTrue("File state failed on reload", 
+                 newFileWordList.contains(test1));
+      assertEquals("File state indexing on reload failed", 
+                   0, newFileWordList.indexOf(test1));
+      
+      assertTrue("File state failed on reload", 
+                 newFileWordList.contains(test2));
+      assertEquals("File state indexing on reload failed", 
+                   1, newFileWordList.indexOf(test2));
+    } finally {
+      lfs.delete(tempDir, true);
+    }
+  }
+  
+  /**
+   * Test {@link FileName#FileNameState} persistence with files and directories.
+   */
+  @Test
+  public void testFileNameState() throws Exception {
+    // test persistence
+    Configuration conf = new Configuration();
+    FileSystem lfs = FileSystem.getLocal(conf);
+    Path rootTempDir =
+      new Path(System.getProperty("test.build.data", "/tmp"));
+
+    Path tempDir = new Path(rootTempDir, "testFileNameState");
+    tempDir = lfs.makeQualified(tempDir);
+    lfs.delete(tempDir, true);
+    
+    // set the persistence directory
+    conf.set(StatePool.DIR_CONFIG, tempDir.toString());
+    conf.setBoolean(StatePool.PERSIST_CONFIG, true);
+    
+    // define a state pool to help persist the filename parts
+    StatePool pool = new StatePool();
+    
+    FileNameState fState = new FileNameState();
+    
+    // define the directory names
+    String testD1 = "test";
+    String testD2 = "home";
+    String testD3 = "tmp";
+    
+    // test dir only
+    WordList dirState = new WordList("dir");
+    dirState.add(testD1);
+    dirState.add(testD2);
+    dirState.add(testD3);
+    
+    // define the file names
+    String testF1 = "part-00.bzip";
+    String testF2 = "file1.txt";
+    String testF3 = "tmp";
+    
+    // test filenames only
+    WordList fileNameState = new WordList("files");
+    fileNameState.add(testF1);
+    fileNameState.add(testF2);
+    fileNameState.add(testF3);
+    
+    // set the filename state
+    fState.setFileNameState(fileNameState);
+    // set the directory state
+    fState.setDirectoryState(dirState);
+    
+    try {
+      // initialize the state-pool
+      pool.initialize(conf);
+
+      // add the wordlist to the pool
+      pool.addState(getClass(), fState);
+
+      // persist the state
+      pool.persist();
+
+      // now clear the pool state
+      pool = new StatePool();
+      
+      // set reload to true
+      conf.setBoolean(StatePool.RELOAD_CONFIG, true);
+      
+      // initialize the state-pool
+      pool.initialize(conf);
+
+      State state = pool.getState(getClass());
+      assertNotNull("Missing state!", state);
+      assertEquals("Incorrect state class!", 
+                    FileNameState.class, state.getClass());
+      FileNameState newFState = (FileNameState) state;
+
+      // test filenames
+      WordList newStateWordList = newFState.getFileNameState();
+      assertTrue("File state failed on reload", 
+                 newStateWordList.contains(testF1));
+      assertEquals("File state indexing on reload failed", 
+                   0, newStateWordList.indexOf(testF1));
+      
+      assertTrue("File state failed on reload", 
+                 newStateWordList.contains(testF2));
+      assertEquals("File state indexing on reload failed", 
+                   1, newStateWordList.indexOf(testF2));
+      
+      assertTrue("File state failed on reload", 
+                 newStateWordList.contains(testF3));
+      assertEquals("File state indexing on reload failed", 
+                   2, newStateWordList.indexOf(testF3));
+      
+      // test dirs
+      WordList newDirWordList = newFState.getDirectoryState();
+      assertTrue("File state failed on reload", 
+                 newDirWordList.contains(testD1));
+      assertEquals("File state indexing on reload failed", 
+                   0, newDirWordList.indexOf(testD1));
+      
+      assertTrue("File state failed on reload", 
+                 newDirWordList.contains(testD2));
+      assertEquals("File state indexing on reload failed", 
+                   1, newDirWordList.indexOf(testD2));
+      assertTrue("File state failed on reload", 
+                 newDirWordList.contains(testD3));
+      assertEquals("File state indexing on reload failed", 
+                   2, newDirWordList.indexOf(testD3));
+    } finally {
+      lfs.delete(tempDir, true);
+    }
+  }
+  
+  /**
+   * Test {@link NodeName#NodeName} persistence with hostnames only.
+   */
+  @Test
+  public void testNodeNameStateWithHostNameOnly() throws Exception {
+    // test persistence
+    Configuration conf = new Configuration();
+    FileSystem lfs = FileSystem.getLocal(conf);
+    Path rootTempDir =
+      new Path(System.getProperty("test.build.data", "/tmp"));
+
+    Path tempDir = new Path(rootTempDir, "testNodeNameStateWithHostNameOnly");
+    tempDir = lfs.makeQualified(tempDir);
+    lfs.delete(tempDir, true);
+    
+    // set the persistence directory
+    conf.set(StatePool.DIR_CONFIG, tempDir.toString());
+    conf.setBoolean(StatePool.PERSIST_CONFIG, true);
+    
+    // define a state pool to help persist the hostnames
+    StatePool pool = new StatePool();
+    
+    NodeNameState nState = new NodeNameState();
+    
+    // define the host names
+    String test1 = "abc123";
+    String test2 = "xyz789";
+    
+    // test hostname only
+    WordList hostNameState = new WordList("hostname");
+    hostNameState.add(test1);
+    hostNameState.add(test2);
+    
+    // set the directory state
+    nState.setHostNameState(hostNameState);
+    
+    try {
+      // initialize the state-pool
+      pool.initialize(conf);
+
+      // add the wordlist to the pool
+      pool.addState(getClass(), nState);
+
+      // persist the state
+      pool.persist();
+
+      // now clear the pool state
+      pool = new StatePool();
+      
+      // set reload to true
+      conf.setBoolean(StatePool.RELOAD_CONFIG, true);
+      
+      // initialize the state-pool
+      pool.initialize(conf);
+
+      State state = pool.getState(getClass());
+      assertNotNull("Missing state!", state);
+      assertEquals("Incorrect state class!", 
+                   NodeNameState.class, state.getClass());
+      NodeNameState newNState = (NodeNameState) state;
+
+      // check the state contents
+      WordList newStateWordList = newNState.getHostNameState();
+      assertTrue("Node state failed to store on reload", 
+                 newStateWordList.contains(test1));
+      assertEquals("Node state index on reload failed", 
+                   0, newStateWordList.indexOf(test1));
+      
+      assertTrue("Node state failed to store on reload", 
+                 newStateWordList.contains(test2));
+      assertEquals("Node state index on reload failed", 
+                   1, newStateWordList.indexOf(test2));
+    } finally {
+      lfs.delete(tempDir, true);
+    }
+  }
+  
+  /**
+   * Test {@link NodeName#NodeNameState} persistence with racknames only.
+   */
+  @Test
+  public void testNodeNameWithRackNamesOnly() throws Exception {
+    // test persistence
+    Configuration conf = new Configuration();
+    FileSystem lfs = FileSystem.getLocal(conf);
+    Path rootTempDir =
+      new Path(System.getProperty("test.build.data", "/tmp"));
+
+    Path tempDir = new Path(rootTempDir, "testNodeNameWithRackNamesOnly");
+    tempDir = lfs.makeQualified(tempDir);
+    lfs.delete(tempDir, true);
+    
+    // set the persistence directory
+    conf.set(StatePool.DIR_CONFIG, tempDir.toString());
+    conf.setBoolean(StatePool.PERSIST_CONFIG, true);
+    
+    // define a state pool to help persist the rack names
+    StatePool pool = new StatePool();
+    
+    NodeNameState nState = new NodeNameState();
+    
+    // define the rack names
+    String test1 = "rack1";
+    String test2 = "rack2";
+    
+    // test filenames only
+    WordList rackNameState = new WordList("racknames");
+    rackNameState.add(test1);
+    rackNameState.add(test2);
+    
+    // set the rackname state
+    nState.setRackNameState(rackNameState);
+    
+    try {
+      // initialize the state-pool
+      pool.initialize(conf);
+
+      // add the wordlist to the pool
+      pool.addState(getClass(), nState);
+
+      // persist the state
+      pool.persist();
+
+      // now clear the pool state
+      pool = new StatePool();
+      
+      // set reload to true
+      conf.setBoolean(StatePool.RELOAD_CONFIG, true);
+      
+      // initialize the state-pool
+      pool.initialize(conf);
+
+      State state = pool.getState(getClass());
+      assertNotNull("Missing state!", state);
+      assertEquals("Incorrect state class!", 
+                   NodeNameState.class, state.getClass());
+      NodeNameState newNState = (NodeNameState) state;
+
+      // check the state contents
+      WordList newFileWordList = newNState.getRackNameState();
+      assertTrue("File state failed on reload", 
+                 newFileWordList.contains(test1));
+      assertEquals("File state indexing on reload failed", 
+                   0, newFileWordList.indexOf(test1));
+      
+      assertTrue("File state failed on reload", 
+                 newFileWordList.contains(test2));
+      assertEquals("File state indexing on reload failed", 
+                   1, newFileWordList.indexOf(test2));
+    } finally {
+      lfs.delete(tempDir, true);
+    }
+  }
+  
+  /**
+   * Test {@link NodeName#NodeNameState} persistence with hosts and racks.
+   */
+  @Test
+  public void testNodeNameState() throws Exception {
+    // test persistence
+    Configuration conf = new Configuration();
+    FileSystem lfs = FileSystem.getLocal(conf);
+    Path rootTempDir =
+      new Path(System.getProperty("test.build.data", "/tmp"));
+
+    Path tempDir = new Path(rootTempDir, "testNodeNameState");
+    tempDir = lfs.makeQualified(tempDir);
+    lfs.delete(tempDir, true);
+    
+    // set the persistence directory
+    conf.set(StatePool.DIR_CONFIG, tempDir.toString());
+    conf.setBoolean(StatePool.PERSIST_CONFIG, true);
+    
+    // define a state pool to help persist the node names.
+    StatePool pool = new StatePool();
+    
+    NodeNameState nState = new NodeNameState();
+    
+    // define the rack names
+    String testR1 = "rack1";
+    String testR2 = "rack2";
+    String testR3 = "rack3";
+    
+    WordList rackState = new WordList("rack");
+    rackState.add(testR1);
+    rackState.add(testR2);
+    rackState.add(testR3);
+    
+    String testH1 = "host1";
+    String testH2 = "host2";
+    String testH3 = "host3";
+    
+    WordList hostNameState = new WordList("host");
+    hostNameState.add(testH1);
+    hostNameState.add(testH2);
+    hostNameState.add(testH3);
+    
+    // set the filename state
+    nState.setHostNameState(hostNameState);
+    nState.setRackNameState(rackState);
+    
+    try {
+      // initialize the state-pool
+      pool.initialize(conf);
+
+      // add the wordlist to the pool
+      pool.addState(getClass(), nState);
+
+      // persist the state
+      pool.persist();
+
+      // now clear the pool state
+      pool = new StatePool();
+      
+      // set reload to true
+      conf.setBoolean(StatePool.RELOAD_CONFIG, true);
+      
+      // initialize the state-pool
+      pool.initialize(conf);
+
+      State state = pool.getState(getClass());
+      assertNotNull("Missing state!", state);
+      assertEquals("Incorrect state class!", 
+                   NodeNameState.class, state.getClass());
+      NodeNameState newNState = (NodeNameState) state;
+
+      // test nodenames
+      WordList newHostWordList = newNState.getHostNameState();
+      assertTrue("File state failed on reload", 
+                 newHostWordList.contains(testH1));
+      assertEquals("File state indexing on reload failed", 
+                   0, newHostWordList.indexOf(testH1));
+      
+      assertTrue("File state failed on reload", 
+                 newHostWordList.contains(testH2));
+      assertEquals("File state indexing on reload failed", 
+                   1, newHostWordList.indexOf(testH2));
+      
+      assertTrue("File state failed on reload", 
+                 newHostWordList.contains(testH3));
+      assertEquals("File state indexing on reload failed", 
+                   2, newHostWordList.indexOf(testH3));
+      
+      // test racknames
+      WordList newRackWordList = newNState.getRackNameState();
+      assertTrue("File state failed on reload", 
+                 newRackWordList.contains(testR1));
+      assertEquals("File state indexing on reload failed", 
+                   0, newRackWordList.indexOf(testR1));
+      
+      assertTrue("File state failed on reload", 
+                 newRackWordList.contains(testR2));
+      assertEquals("File state indexing on reload failed", 
+                   1, newRackWordList.indexOf(testR2));
+      assertTrue("File state failed on reload", 
+                 newRackWordList.contains(testR3));
+      assertEquals("File state indexing on reload failed", 
+                   2, newRackWordList.indexOf(testR3));
+    } finally {
+      lfs.delete(tempDir, true);
+    }
+  }
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java
index 822dced18e..2fe0d7194a 100644
--- a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java
+++ b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java
@@ -20,12 +20,8 @@
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
index 0f2149125a..cb383e760d 100644
--- a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
+++ b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
@@ -960,11 +960,11 @@ public void testTopologyBuilder() throws Exception {
 
     for (LoggedNetworkTopology rack : racks) {
       List<LoggedNetworkTopology> nodes = rack.getChildren();
-      if (rack.getName().endsWith(".64")) {
+      if (rack.getName().getValue().endsWith(".64")) {
         assertEquals("The singleton rack has the wrong number of elements", 1,
             nodes.size());
         sawSingleton = true;
-      } else if (rack.getName().endsWith(".80")) {
+      } else if (rack.getName().getValue().endsWith(".80")) {
         assertEquals("The doubleton rack has the wrong number of elements", 2,
             nodes.size());
         sawDoubleton = true;
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Anonymizer.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Anonymizer.java
new file mode 100644
index 0000000000..e1a2be7ce8
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Anonymizer.java
@@ -0,0 +1,273 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.rumen;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.compress.CodecPool;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.io.compress.Compressor;
+import org.apache.hadoop.mapreduce.ID;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.hadoop.tools.rumen.datatypes.*;
+import org.apache.hadoop.tools.rumen.serializers.*;
+import org.apache.hadoop.tools.rumen.state.*;
+
+import org.codehaus.jackson.JsonEncoding;
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.Version;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.map.module.SimpleModule;
+
+public class Anonymizer extends Configured implements Tool {
+  private boolean anonymizeTrace = false;
+  private Path inputTracePath = null;
+  private Path outputTracePath = null;
+  private boolean anonymizeTopology = false;
+  private Path inputTopologyPath = null;
+  private Path outputTopologyPath = null;
+  
+  //TODO Make this final if not using JSON
+  // private final StatePool statePool = new StatePool();
+  private StatePool statePool;
+  
+  private ObjectMapper outMapper = null;
+  private JsonFactory outFactory = null;
+  
+  private void initialize(String[] args) throws Exception {
+    try {
+      for (int i = 0; i < args.length; ++i) {
+        if ("-trace".equals(args[i])) {
+          anonymizeTrace = true;
+          inputTracePath = new Path(args[i+1]);
+          outputTracePath = new Path(args[i+2]);
+          i +=2;
+        }
+        if ("-topology".equals(args[i])) {
+          anonymizeTopology = true;
+          inputTopologyPath = new Path(args[i+1]);
+          outputTopologyPath = new Path(args[i+2]);
+          i +=2;
+        }
+      }
+    } catch (Exception e) {
+      throw new IllegalArgumentException("Illegal arguments list!", e);
+    }
+    
+    if (!anonymizeTopology && !anonymizeTrace) {
+      throw new IllegalArgumentException("Invalid arguments list!");
+    }
+    
+    statePool = new StatePool();
+    // initialize the state manager after the anonymizers are registered
+    statePool.initialize(getConf());
+     
+    outMapper = new ObjectMapper();
+    // define a module
+    SimpleModule module = new SimpleModule("Anonymization Serializer",  
+                                           new Version(0, 1, 1, "FINAL"));
+    // add various serializers to the module
+    // use the default (as-is) serializer for default data types
+    module.addSerializer(DataType.class, new DefaultRumenSerializer());
+    // use a blocking serializer for Strings as they can contain sensitive 
+    // information
+    module.addSerializer(String.class, new BlockingSerializer());
+    // use object.toString() for object of type ID
+    module.addSerializer(ID.class, new ObjectStringSerializer<ID>());
+    // use getAnonymizedValue() for data types that have the anonymizing 
+    // feature
+    module.addSerializer(AnonymizableDataType.class, 
+        new DefaultAnonymizingRumenSerializer(statePool, getConf()));
+    
+    // register the module with the object-mapper
+    outMapper.registerModule(module);
+    
+    outFactory = outMapper.getJsonFactory();
+  }
+  
+  // anonymize the job trace file
+  private void anonymizeTrace() throws Exception {
+    if (anonymizeTrace) {
+      System.out.println("Anonymizing trace file: " + inputTracePath);
+      JobTraceReader reader = null;
+      JsonGenerator outGen = null;
+      Configuration conf = getConf();
+      
+      try {
+        // create a generator
+        outGen = createJsonGenerator(conf, outputTracePath);
+
+        // define the input trace reader
+        reader = new JobTraceReader(inputTracePath, conf);
+        
+        // read the plain unanonymized logged job
+        LoggedJob job = reader.getNext();
+        
+        while (job != null) {
+          // write it via an anonymizing channel
+          outGen.writeObject(job);
+          // read the next job
+          job = reader.getNext();
+        }
+        
+        System.out.println("Anonymized trace file: " + outputTracePath);
+      } finally {
+        if (outGen != null) {
+          outGen.close();
+        }
+        if (reader != null) {
+          reader.close();
+        }
+      }
+    }
+  }
+
+  // anonymize the cluster topology file
+  private void anonymizeTopology() throws Exception {
+    if (anonymizeTopology) {
+      System.out.println("Anonymizing topology file: " + inputTopologyPath);
+      ClusterTopologyReader reader = null;
+      JsonGenerator outGen = null;
+      Configuration conf = getConf();
+
+      try {
+        // create a generator
+        outGen = createJsonGenerator(conf, outputTopologyPath);
+
+        // define the input cluster topology reader
+        reader = new ClusterTopologyReader(inputTopologyPath, conf);
+        
+        // read the plain unanonymized logged job
+        LoggedNetworkTopology job = reader.get();
+        
+        // write it via an anonymizing channel
+        outGen.writeObject(job);
+        
+        System.out.println("Anonymized topology file: " + outputTopologyPath);
+      } finally {
+        if (outGen != null) {
+          outGen.close();
+        }
+      }
+    }
+  }
+  
+  // Creates a JSON generator
+  private JsonGenerator createJsonGenerator(Configuration conf, Path path) 
+  throws IOException {
+    FileSystem outFS = path.getFileSystem(conf);
+    CompressionCodec codec =
+      new CompressionCodecFactory(conf).getCodec(path);
+    OutputStream output;
+    Compressor compressor = null;
+    if (codec != null) {
+      compressor = CodecPool.getCompressor(codec);
+      output = codec.createOutputStream(outFS.create(path), compressor);
+    } else {
+      output = outFS.create(path);
+    }
+
+    JsonGenerator outGen = outFactory.createJsonGenerator(output, 
+                                                          JsonEncoding.UTF8);
+    outGen.useDefaultPrettyPrinter();
+    
+    return outGen;
+  }
+  
+  @Override
+  public int run(String[] args) throws Exception {
+    try {
+      initialize(args);
+    } catch (Exception e) {
+      e.printStackTrace();
+      printUsage();
+      return -1;
+    }
+    
+    return run();
+  }
+
+  /**
+   * Runs the actual anonymization tool.
+   */
+  public int run() throws Exception {
+    try {
+      anonymizeTrace();
+    } catch (IOException ioe) {
+      System.err.println("Error running the trace anonymizer!");
+      ioe.printStackTrace();
+      System.out.println("\n\nAnonymization unsuccessful!");
+      return -1;
+    }
+    
+    try {
+      anonymizeTopology();
+    } catch (IOException ioe) {
+      System.err.println("Error running the cluster topology anonymizer!");
+      ioe.printStackTrace();
+      System.out.println("\n\nAnonymization unsuccessful!");
+      return -1;
+    }
+    
+    statePool.persist();
+    
+    System.out.println("Anonymization completed successfully!");
+    
+    return 0;
+  }
+
+  private static void printUsage() {
+    System.out.println("\nUsage:-");
+    System.out.print("  Anonymizer");
+    System.out.print(" [-trace <input-trace-path> <output-trace-path>]");
+    System.out.println(" [-topology <input-topology-path> " 
+                       + "<output-topology-path>] ");
+    System.out.print("\n");
+  }
+  
+  /**
+   * The main driver program to use the anonymization utility.
+   * @param args
+   */
+  public static void main(String[] args) {
+    Anonymizer instance = new Anonymizer();
+    int result = 0;
+    
+    try {
+      result = ToolRunner.run(instance, args);
+    } catch (Exception e) {
+      e.printStackTrace(System.err);
+      System.exit(-1);
+    }
+
+    if (result != 0) {
+      System.exit(result);
+    }
+
+    return;
+  }
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Folder.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Folder.java
index c150654c04..cf6643cb63 100644
--- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Folder.java
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Folder.java
@@ -35,23 +35,12 @@
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.io.compress.CodecPool;
-import org.apache.hadoop.io.compress.CompressionCodec;
-import org.apache.hadoop.io.compress.CompressionCodecFactory;
-import org.apache.hadoop.io.compress.Compressor;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 
-import org.codehaus.jackson.JsonEncoding;
-import org.codehaus.jackson.JsonFactory;
-import org.codehaus.jackson.JsonGenerator;
-import org.codehaus.jackson.map.ObjectMapper;
-import org.codehaus.jackson.map.SerializationConfig;
-
 public class Folder extends Configured implements Tool {
   private long outputDuration = -1;
   private long inputCycle = -1;
@@ -66,7 +55,7 @@ public class Folder extends Configured implements Tool {
   static final private Log LOG = LogFactory.getLog(Folder.class);
 
   private DeskewedJobTraceReader reader = null;
-  private JsonGenerator outGen = null;
+  private Outputter<LoggedJob> outGen = null;
 
   private List<Path> tempPaths = new LinkedList<Path>();
 
@@ -171,25 +160,8 @@ private int initialize(String[] args) throws IllegalArgumentException {
               skewBufferLength, !allowMissorting);
       Path outPath = new Path(outputPathName);
 
-      ObjectMapper outMapper = new ObjectMapper();
-      outMapper.configure(
-          SerializationConfig.Feature.CAN_OVERRIDE_ACCESS_MODIFIERS, true);
-      JsonFactory outFactory = outMapper.getJsonFactory();
-      FileSystem outFS = outPath.getFileSystem(conf);
-
-      CompressionCodec codec =
-          new CompressionCodecFactory(conf).getCodec(outPath);
-      OutputStream output;
-      Compressor compressor = null;
-      if (codec != null) {
-        compressor = CodecPool.getCompressor(codec);
-        output = codec.createOutputStream(outFS.create(outPath), compressor);
-      } else {
-        output = outFS.create(outPath);
-      }
-
-      outGen = outFactory.createJsonGenerator(output, JsonEncoding.UTF8);
-      outGen.useDefaultPrettyPrinter();
+      outGen = new DefaultOutputter<LoggedJob>();
+      outGen.init(outPath, conf);
 
       tempDir =
           tempDirName == null ? outPath.getParent() : new Path(tempDirName);
@@ -258,11 +230,6 @@ public int compare(Pair<LoggedJob, JobTraceReader> p1,
       }
     }
 
-    ObjectMapper outMapper = new ObjectMapper();
-    outMapper.configure(
-        SerializationConfig.Feature.CAN_OVERRIDE_ACCESS_MODIFIERS, true);
-    JsonFactory outFactory = outMapper.getJsonFactory();
-
     // we initialize an empty heap so if we take an error before establishing
     // a real one the finally code goes through
     Queue<Pair<LoggedJob, JobTraceReader>> heap =
@@ -310,8 +277,7 @@ public int compare(Pair<LoggedJob, JobTraceReader> p1,
       long currentIntervalEnd = Long.MIN_VALUE;
 
       Path nextSegment = null;
-      OutputStream tempUncompOut = null;
-      JsonGenerator tempGen = null;
+      Outputter<LoggedJob> tempGen = null;
 
       if (debug) {
         LOG.debug("The first job has a submit time of " + firstJobSubmitTime);
@@ -333,7 +299,9 @@ public int compare(Pair<LoggedJob, JobTraceReader> p1,
             if (tempGen != null) {
               tempGen.close();
             }
-            for (int i = 0; i < 3 && tempUncompOut == null; ++i) {
+            
+            nextSegment = null;
+            for (int i = 0; i < 3 && nextSegment == null; ++i) {
               try {
                 nextSegment =
                     new Path(tempDir, "segment-" + tempNameGenerator.nextLong()
@@ -347,7 +315,7 @@ public int compare(Pair<LoggedJob, JobTraceReader> p1,
 
                 try {
                   if (!fs.exists(nextSegment)) {
-                    tempUncompOut = fs.create(nextSegment, false);
+                    break;
                   }
 
                   continue;
@@ -360,6 +328,10 @@ public int compare(Pair<LoggedJob, JobTraceReader> p1,
               }
             }
 
+            if (nextSegment == null) {
+              throw new RuntimeException("Failed to create a new file!");
+            }
+            
             if (debug) {
               LOG.debug("Creating " + nextSegment
                   + " for a job with a submit time of " + job.getSubmitTime());
@@ -369,23 +341,8 @@ public int compare(Pair<LoggedJob, JobTraceReader> p1,
 
             tempPaths.add(nextSegment);
 
-            CompressionCodec codec =
-                new CompressionCodecFactory(conf).getCodec(nextSegment);
-            OutputStream output;
-            Compressor compressor = null;
-            if (codec != null) {
-              compressor = CodecPool.getCompressor(codec);
-              output = codec.createOutputStream(tempUncompOut, compressor);
-            } else {
-              output = tempUncompOut;
-            }
-
-            tempUncompOut = null;
-
-            tempGen = outFactory.createJsonGenerator(output, JsonEncoding.UTF8);
-            if (debug) {
-              tempGen.useDefaultPrettyPrinter();
-            }
+            tempGen = new DefaultOutputter<LoggedJob>();
+            tempGen.init(nextSegment, conf);
 
             long currentIntervalNumber =
                 (job.getSubmitTime() - firstJobSubmitTime) / inputCycle;
@@ -396,7 +353,9 @@ public int compare(Pair<LoggedJob, JobTraceReader> p1,
 
           // the temp files contain UDadjusted times, but each temp file's
           // content is in the same input cycle interval.
-          tempGen.writeObject(job);
+          if (tempGen != null) {
+            tempGen.output(job);
+          }
 
           job = reader.nextJob();
         }
@@ -541,11 +500,11 @@ public int compare(Pair<LoggedJob, JobTraceReader> p1,
 
   private void maybeOutput(LoggedJob job) throws IOException {
     for (int i = 0; i < transcriptionRateInteger; ++i) {
-      outGen.writeObject(job);
+      outGen.output(job);
     }
 
     if (random.nextDouble() < transcriptionRateFraction) {
-      outGen.writeObject(job);
+      outGen.output(job);
     }
   }
 
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/HadoopLogsAnalyzer.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/HadoopLogsAnalyzer.java
index 2c1d397d2d..d76b1d9984 100644
--- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/HadoopLogsAnalyzer.java
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/HadoopLogsAnalyzer.java
@@ -56,12 +56,7 @@
 import org.apache.hadoop.io.compress.CodecPool;
 import org.apache.hadoop.io.compress.Decompressor;
 
-import org.codehaus.jackson.JsonEncoding;
-import org.codehaus.jackson.JsonFactory;
-import org.codehaus.jackson.JsonGenerator;
 import org.codehaus.jackson.JsonProcessingException;
-import org.codehaus.jackson.map.ObjectMapper;
-import org.codehaus.jackson.map.SerializationConfig;
 
 /**
  * This is the main class for rumen log mining functionality.
@@ -126,7 +121,7 @@ public class HadoopLogsAnalyzer extends Configured implements Tool {
    */
   private boolean omitTaskDetails = false;
 
-  private JsonGenerator jobTraceGen = null;
+  private Outputter<LoggedJob> jobTraceGen = null;
 
   private boolean prettyprintTrace = true;
 
@@ -148,7 +143,7 @@ public class HadoopLogsAnalyzer extends Configured implements Tool {
 
   private int[] attemptTimesPercentiles;
 
-  private JsonGenerator topologyGen = null;
+  private Outputter<LoggedNetworkTopology> topologyGen = null;
 
   private HashSet<ParsedHost> allHosts = new HashSet<ParsedHost>();
 
@@ -502,28 +497,12 @@ private int initializeHadoopLogsAnalyzer(String[] args)
     }
 
     if (jobTraceFilename != null) {
-      ObjectMapper jmapper = new ObjectMapper();
-      jmapper.configure(
-          SerializationConfig.Feature.CAN_OVERRIDE_ACCESS_MODIFIERS, true);
-      JsonFactory jfactory = jmapper.getJsonFactory();
-      FileSystem jobFS = jobTraceFilename.getFileSystem(getConf());
-      jobTraceGen =
-          jfactory.createJsonGenerator(jobFS.create(jobTraceFilename),
-              JsonEncoding.UTF8);
-      if (prettyprintTrace) {
-        jobTraceGen.useDefaultPrettyPrinter();
-      }
+      jobTraceGen = new DefaultOutputter<LoggedJob>();
+      jobTraceGen.init(jobTraceFilename, getConf());
 
       if (topologyFilename != null) {
-        ObjectMapper tmapper = new ObjectMapper();
-        tmapper.configure(
-            SerializationConfig.Feature.CAN_OVERRIDE_ACCESS_MODIFIERS, true);
-        JsonFactory tfactory = tmapper.getJsonFactory();
-        FileSystem topoFS = topologyFilename.getFileSystem(getConf());
-        topologyGen =
-            tfactory.createJsonGenerator(topoFS.create(topologyFilename),
-                JsonEncoding.UTF8);
-        topologyGen.useDefaultPrettyPrinter();
+        topologyGen = new DefaultOutputter<LoggedNetworkTopology>();
+        topologyGen.init(topologyFilename, getConf());
       }
     }
 
@@ -795,8 +774,8 @@ private void processJobLine(ParsedLine line) throws JsonProcessingException,
          */
         if (jobID != null
             && jobTraceGen != null
-            && (jobBeingTraced == null || !jobID.equals(jobBeingTraced
-                .getJobID()))) {
+            && (jobBeingTraced == null 
+                || !jobID.equals(jobBeingTraced.getJobID().toString()))) {
           // push out the old job if there is one, even though it did't get
           // mated
           // with a conf.
@@ -1615,7 +1594,7 @@ private void printSingleDistributionData(Histogram dist) {
 
   private void maybeMateJobAndConf() throws IOException {
     if (jobBeingTraced != null && jobconf != null
-        && jobBeingTraced.getJobID().equals(jobconf.jobID)) {
+        && jobBeingTraced.getJobID().toString().equals(jobconf.jobID)) {
       jobBeingTraced.setHeapMegabytes(jobconf.heapMegabytes);
 
       jobBeingTraced.setQueue(jobconf.queue);
@@ -1692,9 +1671,7 @@ private void finalizeJob() throws IOException {
         jobBeingTraced.setMapperTriesToSucceed(null);
       }
 
-      jobTraceGen.writeObject(jobBeingTraced);
-
-      jobTraceGen.writeRaw("\n");
+      jobTraceGen.output(jobBeingTraced);
 
       jobBeingTraced = null;
     }
@@ -1792,7 +1769,7 @@ int run() throws IOException {
     if (topologyGen != null) {
       LoggedNetworkTopology topo =
           new LoggedNetworkTopology(allHosts, "<root>", 0);
-      topologyGen.writeObject(topo);
+      topologyGen.output(topo);
       topologyGen.close();
     }
 
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobBuilder.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobBuilder.java
index 381a46b0c6..6487d10c4f 100644
--- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobBuilder.java
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobBuilder.java
@@ -27,6 +27,7 @@
 import java.util.regex.Pattern;
 
 import org.apache.hadoop.mapred.TaskStatus;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.jobhistory.AMStartedEvent;
 import org.apache.hadoop.mapreduce.jobhistory.HistoryEvent;
@@ -51,6 +52,7 @@
 import org.apache.hadoop.mapreduce.jobhistory.TaskStartedEvent;
 import org.apache.hadoop.mapreduce.jobhistory.TaskUpdatedEvent;
 import org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values;
+import org.apache.hadoop.tools.rumen.datatypes.JobProperties;
 import org.apache.hadoop.util.StringUtils;
 
 /**
@@ -83,11 +85,6 @@ public class JobBuilder {
    * The number of splits a task can have, before we ignore them all.
    */
   private final static int MAXIMUM_PREFERRED_LOCATIONS = 25;
-  /**
-   * The regular expression used to parse task attempt IDs in job tracker logs
-   */
-  private final static Pattern taskAttemptIDPattern =
-      Pattern.compile(".*_([0-9]+)");
 
   private int[] attemptTimesPercentiles = null;
 
@@ -262,7 +259,9 @@ public LoggedJob build() {
     finalized = true;
 
     // set the conf
-    result.setJobProperties(jobConfigurationParameters);
+    if (jobConfigurationParameters != null) {
+      result.setJobProperties(jobConfigurationParameters);
+    }
     
     // initialize all the per-job statistics gathering places
     Histogram[] successfulMapAttemptTimes =
@@ -314,20 +313,10 @@ public LoggedJob build() {
               }
             }
 
-            String attemptID = attempt.getAttemptID();
+            TaskAttemptID attemptID = attempt.getAttemptID();
 
             if (attemptID != null) {
-              Matcher matcher = taskAttemptIDPattern.matcher(attemptID);
-
-              if (matcher.matches()) {
-                String attemptNumberString = matcher.group(1);
-
-                if (attemptNumberString != null) {
-                  int attemptNumber = Integer.parseInt(attemptNumberString);
-
-                  successfulNthMapperAttempts.enter(attemptNumber);
-                }
-              }
+              successfulNthMapperAttempts.enter(attemptID.getId());
             }
           } else {
             if (attempt.getResult() == Pre21JobHistoryConstants.Values.FAILED) {
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JsonObjectMapperWriter.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JsonObjectMapperWriter.java
index c852d81ba6..47bfee0e7f 100644
--- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JsonObjectMapperWriter.java
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JsonObjectMapperWriter.java
@@ -21,10 +21,16 @@
 import java.io.IOException;
 import java.io.OutputStream;
 
+import org.apache.hadoop.mapreduce.ID;
+import org.apache.hadoop.tools.rumen.datatypes.DataType;
+import org.apache.hadoop.tools.rumen.serializers.DefaultRumenSerializer;
+import org.apache.hadoop.tools.rumen.serializers.ObjectStringSerializer;
 import org.codehaus.jackson.JsonEncoding;
 import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.Version;
 import org.codehaus.jackson.map.ObjectMapper;
 import org.codehaus.jackson.map.SerializationConfig;
+import org.codehaus.jackson.map.module.SimpleModule;
 
 /**
  * Simple wrapper around {@link JsonGenerator} to write objects in JSON format.
@@ -37,6 +43,19 @@ public JsonObjectMapperWriter(OutputStream output, boolean prettyPrint) throws I
     ObjectMapper mapper = new ObjectMapper();
     mapper.configure(
         SerializationConfig.Feature.CAN_OVERRIDE_ACCESS_MODIFIERS, true);
+
+    // define a module
+    SimpleModule module = new SimpleModule("Default Serializer",  
+                                           new Version(0, 1, 1, "FINAL"));
+    // add various serializers to the module
+    //   add default (all-pass) serializer for all rumen specific data types
+    module.addSerializer(DataType.class, new DefaultRumenSerializer());
+    //   add a serializer to use object.toString() while serializing
+    module.addSerializer(ID.class, new ObjectStringSerializer<ID>());
+    
+    // register the module with the object-mapper
+    mapper.registerModule(module);
+
     mapper.getJsonFactory();
     writer = mapper.getJsonFactory().createJsonGenerator(
         output, JsonEncoding.UTF8);
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedJob.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedJob.java
index 01171c96cf..ba91d5d895 100644
--- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedJob.java
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedJob.java
@@ -27,6 +27,8 @@
 import java.util.Set;
 import java.util.TreeSet;
 
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.tools.rumen.datatypes.*;
 import org.codehaus.jackson.annotate.JsonAnySetter;
 
 /**
@@ -50,8 +52,8 @@ public enum JobPriority {
   static private Set<String> alreadySeenAnySetterAttributes =
       new TreeSet<String>();
 
-  String jobID;
-  String user;
+  JobID jobID;
+  UserName user;
   long computonsPerMapInputByte = -1L;
   long computonsPerMapOutputByte = -1L;
   long computonsPerReduceInputByte = -1L;
@@ -80,9 +82,9 @@ public enum JobPriority {
   LoggedDiscreteCDF successfulReduceAttemptCDF;
   LoggedDiscreteCDF failedReduceAttemptCDF;
 
-  String queue = null;
+  QueueName queue = null;
 
-  String jobName = null;
+  JobName jobName = null;
 
   int clusterMapMB = -1;
   int clusterReduceMB = -1;
@@ -94,7 +96,7 @@ public enum JobPriority {
   double[] mapperTriesToSucceed;
   double failedMapperFraction; // !!!!!
 
-  private Properties jobProperties = new Properties();
+  private JobProperties jobProperties = new JobProperties();
   
   LoggedJob() {
 
@@ -110,13 +112,13 @@ public enum JobPriority {
    * Set the configuration properties of the job.
    */
   void setJobProperties(Properties conf) {
-    this.jobProperties = conf;
+    this.jobProperties = new JobProperties(conf);
   }
   
   /**
    * Get the configuration properties of the job.
    */
-  public Properties getJobProperties() {
+  public JobProperties getJobProperties() {
     return jobProperties;
   }
   
@@ -138,7 +140,6 @@ void adjustTimes(long adjustment) {
     }
   }
 
-  @SuppressWarnings("unused")
   // for input parameter ignored.
   @JsonAnySetter
   public void setUnknownAttribute(String attributeName, Object ignored) {
@@ -149,20 +150,20 @@ public void setUnknownAttribute(String attributeName, Object ignored) {
     }
   }
 
-  public String getUser() {
+  public UserName getUser() {
     return user;
   }
 
   void setUser(String user) {
-    this.user = user;
+    this.user = new UserName(user);
   }
 
-  public String getJobID() {
+  public JobID getJobID() {
     return jobID;
   }
 
   void setJobID(String jobID) {
-    this.jobID = jobID;
+    this.jobID = JobID.forName(jobID);
   }
 
   public JobPriority getPriority() {
@@ -359,20 +360,20 @@ void setRelativeTime(long relativeTime) {
     this.relativeTime = relativeTime;
   }
 
-  public String getQueue() {
+  public QueueName getQueue() {
     return queue;
   }
 
   void setQueue(String queue) {
-    this.queue = queue;
+    this.queue = new QueueName(queue);
   }
 
-  public String getJobName() {
+  public JobName getJobName() {
     return jobName;
   }
 
   void setJobName(String jobName) {
-    this.jobName = jobName;
+    this.jobName = new JobName(jobName);
   }
 
   public int getClusterMapMB() {
@@ -555,35 +556,54 @@ private void compareCDFs(List<LoggedDiscreteCDF> c1,
     }
   }
 
-  private void compareJobProperties(Properties prop1, Properties prop2,
+  private void compareJobProperties(JobProperties jprop1, JobProperties jprop2,
                                     TreePath loc, String eltname) 
   throws DeepInequalityException {
-    if (prop1 == null && prop2 == null) {
+    if (jprop1 == null && jprop2 == null) {
       return;
     }
 
-    if (prop1 == null || prop2 == null) {
-      throw new DeepInequalityException(eltname + " miscompared [null]", 
+    if (jprop1 == null || jprop2 == null) {
+      throw new DeepInequalityException(eltname + " miscompared", 
                                         new TreePath(loc, eltname));
     }
 
+    Properties prop1 = jprop1.getValue();
+    Properties prop2 = jprop2.getValue();
+    
     if (prop1.size() != prop2.size()) {
       throw new DeepInequalityException(eltname + " miscompared [size]", 
                                         new TreePath(loc, eltname));
     }
     
     for (Map.Entry<Object, Object> entry : prop1.entrySet()) {
-      Object v1 = entry.getValue();
-      Object v2 = prop2.get(entry.getKey());
-      if (v1 == null || v2 == null || !v1.equals(v2)) {
-        throw new DeepInequalityException(
-          eltname + " miscompared for value of key : " 
-            + entry.getKey().toString(), 
-          new TreePath(loc, eltname));
-      }
+      String v1 = entry.getValue().toString();
+      String v2 = prop2.get(entry.getKey()).toString();
+      compare1(v1, v2, new TreePath(loc, eltname), "key:" + entry.getKey());
     }
   }
   
+  private void compare1(DataType<String> c1, DataType<String> c2, TreePath loc, 
+                        String eltname) 
+  throws DeepInequalityException {
+    if (c1 == null && c2 == null) {
+      return;
+    }
+
+    if (c1 == null || c2 == null) {
+      throw new DeepInequalityException(eltname + " miscompared", 
+                                        new TreePath(loc, eltname));
+    }
+    TreePath dtPath = new TreePath(loc, eltname);
+    
+    if (!c1.getClass().getName().equals(c2.getClass().getName())) {
+      throw new DeepInequalityException(eltname + " miscompared", 
+                                        new TreePath(dtPath, "class"));
+    }
+    
+    compare1(c1.getValue(), c2.getValue(), dtPath, "value");
+  }
+  
   public void deepCompare(DeepCompare comparand, TreePath loc)
       throws DeepInequalityException {
     if (!(comparand instanceof LoggedJob)) {
@@ -592,7 +612,7 @@ public void deepCompare(DeepCompare comparand, TreePath loc)
 
     LoggedJob other = (LoggedJob) comparand;
 
-    compare1(jobID, other.jobID, loc, "jobID");
+    compare1(jobID.toString(), other.jobID.toString(), loc, "jobID");
     compare1(user, other.user, loc, "user");
 
     compare1(computonsPerMapInputByte, other.computonsPerMapInputByte, loc,
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedLocation.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedLocation.java
index 1291f6fc12..047cd63a90 100644
--- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedLocation.java
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedLocation.java
@@ -25,6 +25,7 @@
 import java.util.Set;
 import java.util.TreeSet;
 
+import org.apache.hadoop.tools.rumen.datatypes.NodeName;
 import org.codehaus.jackson.annotate.JsonAnySetter;
 
 /**
@@ -44,20 +45,20 @@
  * 
  */
 public class LoggedLocation implements DeepCompare {
-   static final Map<List<String>, List<String>> layersCache = 
-    new HashMap<List<String>, List<String>>();
+   static final Map<List<String>, List<NodeName>> layersCache = 
+    new HashMap<List<String>, List<NodeName>>();
 
   /**
    * The full path from the root of the network to the host.
    * 
    * NOTE that this assumes that the network topology is a tree.
    */
-  List<String> layers = Collections.emptyList();
+  List<NodeName> layers = Collections.emptyList();
 
   static private Set<String> alreadySeenAnySetterAttributes =
       new TreeSet<String>();
 
-  public List<String> getLayers() {
+  public List<NodeName> getLayers() {
     return layers;
   }
 
@@ -66,16 +67,17 @@ void setLayers(List<String> layers) {
       this.layers = Collections.emptyList();
     } else {
       synchronized (layersCache) {
-        List<String> found = layersCache.get(layers);
+        List<NodeName> found = layersCache.get(layers);
         if (found == null) {
           // make a copy with interned string.
-          List<String> clone = new ArrayList<String>(layers.size());
-          for (String s : layers) {
-            clone.add(s.intern());
-          }
+          List<NodeName> clone = new ArrayList<NodeName>(layers.size());
+          clone.add(new NodeName(layers.get(0).intern(), null)); 
+          clone.add(new NodeName(null, layers.get(1).intern()));
+          
           // making it read-only as we are sharing them.
-          List<String> readonlyLayers = Collections.unmodifiableList(clone);
-          layersCache.put(readonlyLayers, readonlyLayers);
+          List<NodeName> readonlyLayers = Collections.unmodifiableList(clone);
+          List<String> readonlyLayersKey = Collections.unmodifiableList(layers);
+          layersCache.put(readonlyLayersKey, readonlyLayers);
           this.layers = readonlyLayers;
         } else {
           this.layers = found;
@@ -84,7 +86,6 @@ void setLayers(List<String> layers) {
     }
   }
 
-  @SuppressWarnings("unused")
   // for input parameter ignored.
   @JsonAnySetter
   public void setUnknownAttribute(String attributeName, Object ignored) {
@@ -96,17 +97,33 @@ public void setUnknownAttribute(String attributeName, Object ignored) {
   }
 
   // I'll treat this as an atomic object type
-  private void compareStrings(List<String> c1, List<String> c2, TreePath loc,
-      String eltname) throws DeepInequalityException {
+  private void compareStrings(List<NodeName> c1, List<NodeName> c2, 
+                              TreePath loc, String eltname) 
+  throws DeepInequalityException {
     if (c1 == null && c2 == null) {
       return;
     }
 
     TreePath recursePath = new TreePath(loc, eltname);
 
-    if (c1 == null || c2 == null || !c1.equals(c2)) {
+    if (c1 == null || c2 == null || (c1.size() != c2.size())) {
       throw new DeepInequalityException(eltname + " miscompared", recursePath);
     }
+    
+    for (NodeName n1 : c1) {
+      boolean found = false;
+      for (NodeName n2 : c2) {
+        if (n1.getValue().equals(n2.getValue())) {
+          found = true;
+          break;
+        }
+      }
+      
+      if (!found) {
+        throw new DeepInequalityException(eltname 
+                  + " miscompared [" + n1.getValue() +"]", recursePath);
+      }
+    }
   }
 
   public void deepCompare(DeepCompare comparand, TreePath loc)
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedNetworkTopology.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedNetworkTopology.java
index b42cbd1e88..3d9caf0b35 100644
--- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedNetworkTopology.java
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedNetworkTopology.java
@@ -28,6 +28,7 @@
 import java.util.ArrayList;
 import java.util.Comparator;
 
+import org.apache.hadoop.tools.rumen.datatypes.NodeName;
 import org.codehaus.jackson.annotate.JsonAnySetter;
 
 /**
@@ -40,7 +41,7 @@
  * 
  */
 public class LoggedNetworkTopology implements DeepCompare {
-  String name;
+  NodeName name;
   List<LoggedNetworkTopology> children = new ArrayList<LoggedNetworkTopology>();
 
   static private Set<String> alreadySeenAnySetterAttributes =
@@ -50,7 +51,6 @@ public LoggedNetworkTopology() {
     super();
   }
 
-  @SuppressWarnings("unused")
   // for input parameter ignored.
   @JsonAnySetter
   public void setUnknownAttribute(String attributeName, Object ignored) {
@@ -70,7 +70,7 @@ public void setUnknownAttribute(String attributeName, Object ignored) {
    */
   static class TopoSort implements Comparator<LoggedNetworkTopology> {
     public int compare(LoggedNetworkTopology t1, LoggedNetworkTopology t2) {
-      return t1.name.compareTo(t2.name);
+      return t1.name.getValue().compareTo(t2.name.getValue());
     }
   }
 
@@ -83,8 +83,11 @@ public int compare(LoggedNetworkTopology t1, LoggedNetworkTopology t2) {
    *          the level number
    */
   LoggedNetworkTopology(Set<ParsedHost> hosts, String name, int level) {
-
-    this.name = name;
+    if (name == null) {
+      this.name = NodeName.ROOT;
+    } else {
+      this.name = new NodeName(name);
+    }
     this.children = null;
 
     if (level < ParsedHost.numberOfDistances() - 1) {
@@ -120,15 +123,15 @@ public int compare(LoggedNetworkTopology t1, LoggedNetworkTopology t2) {
   }
 
   LoggedNetworkTopology(Set<ParsedHost> hosts) {
-    this(hosts, "<root>", 0);
+    this(hosts, null, 0);
   }
 
-  public String getName() {
+  public NodeName getName() {
     return name;
   }
 
   void setName(String name) {
-    this.name = name;
+    this.name = new NodeName(name);
   }
 
   public List<LoggedNetworkTopology> getChildren() {
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedTask.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedTask.java
index 33a0f64ba6..903d5fbcfb 100644
--- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedTask.java
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedTask.java
@@ -23,7 +23,7 @@
 import java.util.Set;
 import java.util.TreeSet;
 
-import org.apache.hadoop.mapreduce.jobhistory.Events;
+import org.apache.hadoop.mapreduce.TaskID;
 import org.apache.hadoop.mapreduce.jobhistory.JhCounter;
 import org.apache.hadoop.mapreduce.jobhistory.JhCounterGroup;
 import org.apache.hadoop.mapreduce.jobhistory.JhCounters;
@@ -44,7 +44,7 @@ public class LoggedTask implements DeepCompare {
   long inputRecords = -1L;
   long outputBytes = -1L;
   long outputRecords = -1L;
-  String taskID;
+  TaskID taskID;
   long startTime = -1L;
   long finishTime = -1L;
   Pre21JobHistoryConstants.Values taskType;
@@ -55,7 +55,6 @@ public class LoggedTask implements DeepCompare {
   static private Set<String> alreadySeenAnySetterAttributes =
       new TreeSet<String>();
 
-  @SuppressWarnings("unused")
   // for input parameter ignored.
   @JsonAnySetter
   public void setUnknownAttribute(String attributeName, Object ignored) {
@@ -111,12 +110,12 @@ void setOutputRecords(long outputRecords) {
     this.outputRecords = outputRecords;
   }
 
-  public String getTaskID() {
+  public TaskID getTaskID() {
     return taskID;
   }
 
   void setTaskID(String taskID) {
-    this.taskID = taskID;
+    this.taskID = TaskID.forName(taskID);
   }
 
   public long getStartTime() {
@@ -357,7 +356,7 @@ public void deepCompare(DeepCompare comparand, TreePath loc)
     compare1(outputBytes, other.outputBytes, loc, "outputBytes");
     compare1(outputRecords, other.outputRecords, loc, "outputRecords");
 
-    compare1(taskID, other.taskID, loc, "taskID");
+    compare1(taskID.toString(), other.taskID.toString(), loc, "taskID");
 
     compare1(startTime, other.startTime, loc, "startTime");
     compare1(finishTime, other.finishTime, loc, "finishTime");
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedTaskAttempt.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedTaskAttempt.java
index 5437a15003..578fb4aac4 100644
--- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedTaskAttempt.java
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/LoggedTaskAttempt.java
@@ -30,9 +30,11 @@
 //                the Jackson implementation of JSON doesn't handle a 
 //                superclass-valued field.
 
+import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.mapreduce.jobhistory.JhCounter;
 import org.apache.hadoop.mapreduce.jobhistory.JhCounterGroup;
 import org.apache.hadoop.mapreduce.jobhistory.JhCounters;
+import org.apache.hadoop.tools.rumen.datatypes.NodeName;
 
 /**
  * A {@link LoggedTaskAttempt} represents an attempt to run an hadoop task in a
@@ -44,11 +46,11 @@
  */
 public class LoggedTaskAttempt implements DeepCompare {
 
-  String attemptID;
+  TaskAttemptID attemptID;
   Pre21JobHistoryConstants.Values result;
   long startTime = -1L;
   long finishTime = -1L;
-  String hostName;
+  NodeName hostName;
 
   long hdfsBytesRead = -1L;
   long hdfsBytesWritten = -1L;
@@ -188,7 +190,6 @@ public List<List<Integer>> allSplitVectors() {
   static private Set<String> alreadySeenAnySetterAttributes =
       new TreeSet<String>();
 
-  @SuppressWarnings("unused")
   // for input parameter ignored.
   @JsonAnySetter
   public void setUnknownAttribute(String attributeName, Object ignored) {
@@ -213,7 +214,7 @@ void arraySetClockSplits(int[] clockSplits) {
     for (int i = 0; i < clockSplits.length; ++i) {
       result.add(clockSplits[i]);
     }
-                 
+
     this.clockSplits = result;
   }
 
@@ -231,7 +232,7 @@ void arraySetCpuUsages(int[] cpuUsages) {
     for (int i = 0; i < cpuUsages.length; ++i) {
       result.add(cpuUsages[i]);
     }
-                 
+
     this.cpuUsages = result;
   }
 
@@ -249,7 +250,7 @@ void arraySetVMemKbytes(int[] vMemKbytes) {
     for (int i = 0; i < vMemKbytes.length; ++i) {
       result.add(vMemKbytes[i]);
     }
-                 
+
     this.vMemKbytes = result;
   }
 
@@ -267,7 +268,7 @@ void arraySetPhysMemKbytes(int[] physMemKbytes) {
     for (int i = 0; i < physMemKbytes.length; ++i) {
       result.add(physMemKbytes[i]);
     }
-                 
+
     this.physMemKbytes = result;
   }
 
@@ -292,12 +293,12 @@ void setSortFinished(long sortFinished) {
     this.sortFinished = sortFinished;
   }
 
-  public String getAttemptID() {
+  public TaskAttemptID getAttemptID() {
     return attemptID;
   }
 
   void setAttemptID(String attemptID) {
-    this.attemptID = attemptID;
+    this.attemptID = TaskAttemptID.forName(attemptID);
   }
 
   public Pre21JobHistoryConstants.Values getResult() {
@@ -324,15 +325,17 @@ void setFinishTime(long finishTime) {
     this.finishTime = finishTime;
   }
 
-  public String getHostName() {
+  public NodeName getHostName() {
     return hostName;
   }
 
+  // This is needed for JSON deserialization
   void setHostName(String hostName) {
-    this.hostName = hostName;
+    this.hostName = hostName == null ? null : new NodeName(hostName);
   }
-  
-  // hostName is saved in the format rackName/NodeName
+
+  // In job-history, hostName is saved in the format rackName/NodeName
+  //TODO this is a hack! The '/' handling needs fixing.
   void setHostName(String hostName, String rackName) {
     if (hostName == null || hostName.length() == 0) {
       throw new RuntimeException("Invalid entry! Missing hostname");
@@ -649,6 +652,20 @@ private void compare1(String c1, String c2, TreePath loc, String eltname)
     }
   }
 
+  private void compare1(NodeName c1, NodeName c2, TreePath loc, String eltname)
+  throws DeepInequalityException {
+    if (c1 == null && c2 == null) {
+      return;
+    }
+
+    if (c1 == null || c2 == null) {
+      throw new DeepInequalityException(eltname + " miscompared", new TreePath(
+          loc, eltname));
+    }
+
+    compare1(c1.getValue(), c2.getValue(), new TreePath(loc, eltname), "value");
+  }
+
   private void compare1(long c1, long c2, TreePath loc, String eltname)
       throws DeepInequalityException {
     if (c1 != c2) {
@@ -709,7 +726,7 @@ public void deepCompare(DeepCompare comparand, TreePath loc)
 
     LoggedTaskAttempt other = (LoggedTaskAttempt) comparand;
 
-    compare1(attemptID, other.attemptID, loc, "attemptID");
+    compare1(attemptID.toString(), other.attemptID.toString(), loc, "attemptID");
     compare1(result, other.result, loc, "result");
     compare1(startTime, other.startTime, loc, "startTime");
     compare1(finishTime, other.finishTime, loc, "finishTime");
@@ -745,4 +762,4 @@ public void deepCompare(DeepCompare comparand, TreePath loc)
     compare1(vMemKbytes, other.vMemKbytes, loc, "vMemKbytes");
     compare1(physMemKbytes, other.physMemKbytes, loc, "physMemKbytes");
   }
-}
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/ParsedHost.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/ParsedHost.java
index 1e75a6aafb..082f6844fb 100644
--- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/ParsedHost.java
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/ParsedHost.java
@@ -22,7 +22,9 @@
 import java.util.regex.Pattern;
 import java.util.regex.Matcher;
 
-class ParsedHost {
+import org.apache.hadoop.tools.rumen.datatypes.NodeName;
+
+public class ParsedHost {
   private final String rackName;
   private final String nodeName;
 
@@ -70,10 +72,10 @@ public static ParsedHost parse(String name) {
   }
 
   public ParsedHost(LoggedLocation loc) {
-    List<String> coordinates = loc.getLayers();
+    List<NodeName> coordinates = loc.getLayers();
 
-    rackName = coordinates.get(0);
-    nodeName = coordinates.get(1);
+    rackName = coordinates.get(0).getRackName();
+    nodeName = coordinates.get(1).getHostName();
   }
 
   LoggedLocation makeLoggedLocation() {
@@ -89,11 +91,11 @@ LoggedLocation makeLoggedLocation() {
     return result;
   }
   
-  String getNodeName() {
+  public String getNodeName() {
     return nodeName;
   }
   
-  String getRackName() {
+  public String getRackName() {
     return rackName;
   }
 
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/ZombieCluster.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/ZombieCluster.java
index 49deec3eee..6b9fdd202c 100644
--- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/ZombieCluster.java
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/ZombieCluster.java
@@ -124,15 +124,16 @@ private final void buildCluster(LoggedNetworkTopology topology,
       int level = levelMapping.get(n);
       Node current;
       if (level == leafLevel) { // a machine node
-        MachineNode.Builder builder = new MachineNode.Builder(n.getName(), level);
+        MachineNode.Builder builder = 
+          new MachineNode.Builder(n.getName().getValue(), level);
         if (defaultNode != null) {
           builder.cloneFrom(defaultNode);
         }
         current = builder.build();
       } else {
         current = (level == leafLevel - 1) 
-          ? new RackNode(n.getName(), level) : 
-            new Node(n.getName(), level);
+          ? new RackNode(n.getName().getValue(), level) : 
+            new Node(n.getName().getValue(), level);
         path[level] = current;
         // Add all children to the front of the queue.
         for (LoggedNetworkTopology child : n.getChildren()) {
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/ZombieJob.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/ZombieJob.java
index 4536413186..e760ec92ee 100644
--- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/ZombieJob.java
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/ZombieJob.java
@@ -28,12 +28,14 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.TaskStatus.State;
+import org.apache.hadoop.mapreduce.ID;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.mapreduce.TaskID;
 import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.tools.rumen.datatypes.*;
 import org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values;
 
 /**
@@ -128,7 +130,7 @@ public synchronized JobConf getJobConf() {
       // file, are added first because the specialized values obtained from 
       // Rumen should override the job conf values.
       //
-      for (Map.Entry<Object, Object> entry : job.getJobProperties().entrySet()) {
+      for (Map.Entry<Object, Object> entry : job.getJobProperties().getValue().entrySet()) {
         jobConf.set(entry.getKey().toString(), entry.getValue().toString());
       }
       
@@ -161,12 +163,12 @@ public InputSplit[] getInputSplits() {
         List<String> hostList = new ArrayList<String>();
         if (locations != null) {
           for (LoggedLocation location : locations) {
-            List<String> layers = location.getLayers();
+            List<NodeName> layers = location.getLayers();
             if (layers.size() == 0) {
               LOG.warn("Bad location layer format for task "+mapTask.getTaskID());
               continue;
             }
-            String host = layers.get(layers.size() - 1);
+            String host = layers.get(layers.size() - 1).getValue();
             if (host == null) {
               LOG.warn("Bad location layer format for task "+mapTask.getTaskID() + ": " + layers);
               continue;
@@ -226,20 +228,20 @@ public InputSplit[] getInputSplits() {
 
   @Override
   public String getName() {
-    String jobName = job.getJobName();
+    JobName jobName = job.getJobName();
     if (jobName == null) {
       return "(name unknown)";
     } else {
-      return jobName;
+      return jobName.getValue();
     }
   }
 
   @Override
   public JobID getJobID() {
-    return JobID.forName(getLoggedJob().getJobID());
+    return getLoggedJob().getJobID();
   }
 
-  private int sanitizeValue(int oldVal, int defaultVal, String name, String id) {
+  private int sanitizeValue(int oldVal, int defaultVal, String name, JobID id) {
     if (oldVal == -1) {
       LOG.warn(name +" not defined for "+id);
       return defaultVal;
@@ -269,8 +271,10 @@ public long getSubmissionTime() {
 
   @Override
   public String getQueueName() {
-    String queue = job.getQueue();
-    return (queue == null)? JobConf.DEFAULT_QUEUE_NAME : queue;
+    QueueName queue = job.getQueue();
+    return (queue == null || queue.getValue() == null) 
+           ? JobConf.DEFAULT_QUEUE_NAME 
+           : queue.getValue();
   }
   
   /**
@@ -357,13 +361,12 @@ private synchronized void buildMaps() {
       for (LoggedTask map : job.getMapTasks()) {
         map = sanitizeLoggedTask(map);
         if (map != null) {
-          loggedTaskMap.put(maskTaskID(TaskID.forName(map.taskID)), map);
+          loggedTaskMap.put(maskTaskID(map.taskID), map);
 
           for (LoggedTaskAttempt mapAttempt : map.getAttempts()) {
             mapAttempt = sanitizeLoggedTaskAttempt(mapAttempt);
             if (mapAttempt != null) {
-              TaskAttemptID id = TaskAttemptID.forName(mapAttempt
-                  .getAttemptID());
+              TaskAttemptID id = mapAttempt.getAttemptID();
               loggedTaskAttemptMap.put(maskAttemptID(id), mapAttempt);
             }
           }
@@ -372,13 +375,12 @@ private synchronized void buildMaps() {
       for (LoggedTask reduce : job.getReduceTasks()) {
         reduce = sanitizeLoggedTask(reduce);
         if (reduce != null) {
-          loggedTaskMap.put(maskTaskID(TaskID.forName(reduce.taskID)), reduce);
+          loggedTaskMap.put(maskTaskID(reduce.taskID), reduce);
 
           for (LoggedTaskAttempt reduceAttempt : reduce.getAttempts()) {
             reduceAttempt = sanitizeLoggedTaskAttempt(reduceAttempt);
             if (reduceAttempt != null) {
-              TaskAttemptID id = TaskAttemptID.forName(reduceAttempt
-                  .getAttemptID());
+              TaskAttemptID id = reduceAttempt.getAttemptID();
               loggedTaskAttemptMap.put(maskAttemptID(id), reduceAttempt);
             }
           }
@@ -391,8 +393,10 @@ private synchronized void buildMaps() {
 
   @Override
   public String getUser() {
-    String retval = job.getUser();
-    return (retval==null)?"(unknown)":retval;
+    UserName retval = job.getUser();
+    return (retval == null || retval.getValue() == null)
+           ? "(unknown)"
+           : retval.getValue();
   }
 
   /**
@@ -511,7 +515,7 @@ public TaskAttemptInfo getMapTaskAttemptInfoAdjusted(int taskNumber,
     }
   }
 
-  private long sanitizeTaskRuntime(long time, String id) {
+  private long sanitizeTaskRuntime(long time, ID id) {
     if (time < 0) {
       LOG.warn("Negative running time for task "+id+": "+time);
       return 100L; // set default to 100ms.
@@ -547,7 +551,7 @@ private TaskAttemptInfo scaleInfo(LoggedTask loggedTask,
 
   private int getLocality(LoggedTask loggedTask, LoggedTaskAttempt loggedAttempt) {
     int distance = cluster.getMaximumDistance();
-    String rackHostName = loggedAttempt.getHostName();
+    String rackHostName = loggedAttempt.getHostName().getValue();
     if (rackHostName == null) {
       return distance;
     }
@@ -558,11 +562,11 @@ private int getLocality(LoggedTask loggedTask, LoggedTaskAttempt loggedAttempt)
     List<LoggedLocation> locations = loggedTask.getPreferredLocations();
     if (locations != null) {
       for (LoggedLocation location : locations) {
-        List<String> layers = location.getLayers();
+        List<NodeName> layers = location.getLayers();
         if ((layers == null) || (layers.isEmpty())) {
           continue;
         }
-        String dataNodeName = layers.get(layers.size()-1);
+        String dataNodeName = layers.get(layers.size()-1).getValue();
         MachineNode dataNode = cluster.getMachineByName(dataNodeName);
         if (dataNode != null) {
           distance = Math.min(distance, cluster.distance(mn, dataNode));
@@ -690,8 +694,8 @@ private TaskInfo getTaskInfo(LoggedTask loggedTask) {
 
   private TaskAttemptID makeTaskAttemptID(TaskType taskType, int taskNumber,
       int taskAttemptNumber) {
-    return new TaskAttemptID(new TaskID(JobID.forName(job.getJobID()),
-        taskType, taskNumber), taskAttemptNumber);
+    return new TaskAttemptID(new TaskID(job.getJobID(), taskType, taskNumber), 
+                             taskAttemptNumber);
   }
   
   private TaskAttemptInfo makeUpTaskAttemptInfo(TaskType taskType, TaskInfo taskInfo,
@@ -704,7 +708,7 @@ private TaskAttemptInfo makeUpTaskAttemptInfo(TaskType taskType, TaskInfo taskIn
       state = makeUpState(taskAttemptNumber, job.getMapperTriesToSucceed());
       runtime = makeUpMapRuntime(state, locality);
       runtime = sanitizeTaskRuntime(runtime, makeTaskAttemptID(taskType,
-          taskNumber, taskAttemptNumber).toString());
+                                               taskNumber, taskAttemptNumber));
       TaskAttemptInfo tai
         = new MapTaskAttemptInfo(state, taskInfo, runtime, null);
       return tai;
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/anonymization/DataAnonymizer.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/anonymization/DataAnonymizer.java
new file mode 100644
index 0000000000..c3d45a6d4e
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/anonymization/DataAnonymizer.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.anonymization;
+
+import org.apache.hadoop.tools.rumen.state.State;
+
+/**
+ * The data anonymizer interface.
+ */
+public interface DataAnonymizer<T> {
+  T anonymize(T data, State state);
+}
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/anonymization/WordList.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/anonymization/WordList.java
new file mode 100644
index 0000000000..f160fcfab7
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/anonymization/WordList.java
@@ -0,0 +1,106 @@
+package org.apache.hadoop.tools.rumen.anonymization;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.tools.rumen.state.State;
+
+/**
+ * Represents the list of words used in list-backed anonymizers.
+ */
+public class WordList implements State {
+  private Map<String, Integer> list = new HashMap<String, Integer>(0);
+  private boolean isUpdated = false;
+  private String name;
+
+  public WordList() {
+    this("word");
+  }
+
+  public WordList(String name) {
+    this.name = name;
+  }
+
+  @Override
+  public String getName() {
+    return name;
+  }
+
+  /**
+   * Adds the specified word to the list if the word is not already added.
+   */
+  public void add(String word) {
+    if (!contains(word)) {
+      int index = getSize();
+      list.put(word, index);
+      isUpdated = true;
+    }
+  }
+
+  /**
+   * Returns 'true' if the list contains the specified word.
+   */
+  public boolean contains(String word) {
+    return list.containsKey(word);
+  }
+
+  /**
+   * Returns the index of the specified word in the list.
+   */
+  public int indexOf(String word) {
+    return list.get(word);
+  }
+
+  /**
+   * Returns the size of the list.
+   */
+  public int getSize() {
+    return list.size();
+  }
+
+  /**
+   * Returns 'true' if the list is updated since creation (and reload).
+   */
+  @Override
+  public boolean isUpdated() {
+    return isUpdated;
+  }
+  
+  /**
+   * Setters and getters for Jackson JSON
+   */
+  /**
+   * Sets the size of the list.
+   * 
+   * Note: That this API is only for Jackson JSON deserialization.
+   */
+  public void setSize(int size) {
+    list = new HashMap<String, Integer>(size);
+  }
+  
+  /**
+   * Note: That this API is only for Jackson JSON deserialization.
+   */
+  @Override
+  public void setName(String name) {
+    this.name = name;
+  }
+  
+  /**
+   * Gets the words.
+   * 
+   * Note: That this API is only for Jackson JSON serialization.
+   */
+  public Map<String, Integer> getWords() {
+    return list;
+  }
+  
+  /**
+   * Sets the words. 
+   * 
+   * Note: That this API is only for Jackson JSON deserialization.
+   */
+  public void setWords(Map<String, Integer> list) {
+    this.list = list;
+  }
+}
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/anonymization/WordListAnonymizerUtility.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/anonymization/WordListAnonymizerUtility.java
new file mode 100644
index 0000000000..de3fa9c376
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/anonymization/WordListAnonymizerUtility.java
@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.anonymization;
+
+
+import org.apache.commons.lang.StringUtils;
+
+/**
+ * Utility class to handle commonly performed tasks in a 
+ * {@link org.apache.hadoop.tools.rumen.datatypes.DefaultAnonymizableDataType} 
+ * using a {@link WordList} for anonymization.
+ * //TODO There is no caching for saving memory.
+ */
+public class WordListAnonymizerUtility {
+  public static final String[] KNOWN_WORDS = 
+    new String[] {"job", "tmp", "temp", "home", "homes", "usr", "user", "test"};
+  
+  /**
+   * Checks if the data needs anonymization. Typically, data types which are 
+   * numeric in nature doesn't need anonymization.
+   */
+  public static boolean needsAnonymization(String data) {
+    // Numeric data doesn't need anonymization
+    // Currently this doesnt support inputs like
+    //   - 12.3
+    //   - 12.3f
+    //   - 90L
+    //   - 1D
+    if (StringUtils.isNumeric(data)) {
+      return false;
+    }
+    return true; // by default return true
+  }
+  
+  /**
+   * Checks if the given data has a known suffix.
+   */
+  public static boolean hasSuffix(String data, String[] suffixes) {
+    // check if they end in known suffixes
+    for (String ks : suffixes) {
+      if (data.endsWith(ks)) {
+        return true;
+      }
+    }
+    
+    return false;
+  }
+  
+  /**
+   * Extracts a known suffix from the given data.
+   * 
+   * @throws RuntimeException if the data doesn't have a suffix. 
+   *         Use {@link #hasSuffix(String, String[])} to make sure that the 
+   *         given data has a suffix.
+   */
+  public static String[] extractSuffix(String data, String[] suffixes) {
+    // check if they end in known suffixes
+    String suffix = "";
+    for (String ks : suffixes) {
+      if (data.endsWith(ks)) {
+        suffix = ks;
+        // stripe off the suffix which will get appended later
+        data = data.substring(0, data.length() - suffix.length());
+        return new String[] {data, suffix};
+      }
+    }
+    
+    // throw exception
+    throw new RuntimeException("Data [" + data + "] doesn't have a suffix from" 
+        + " known suffixes [" + StringUtils.join(suffixes, ',') + "]");
+  }
+  
+  /**
+   * Checks if the given data is known. This API uses {@link #KNOWN_WORDS} to
+   * detect if the given data is a commonly used (so called 'known') word.
+   */
+  public static boolean isKnownData(String data) {
+    return isKnownData(data, KNOWN_WORDS);
+  }
+  
+  /**
+   * Checks if the given data is known.
+   */
+  public static boolean isKnownData(String data, String[] knownWords) {
+    // check if the data is known content
+    //TODO [Chunking] Do this for sub-strings of data
+    
+    for (String kd : knownWords) {
+      if (data.equals(kd)) {
+        return true;
+      }
+    }
+    return false;
+  }
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/AnonymizableDataType.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/AnonymizableDataType.java
new file mode 100644
index 0000000000..c70d775200
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/AnonymizableDataType.java
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.datatypes;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.tools.rumen.state.StatePool;
+
+/**
+ * An interface for data-types that can be anonymized.
+ */
+public interface AnonymizableDataType<T> extends DataType<T> {
+  public T getAnonymizedValue(StatePool statePool, Configuration conf);
+}
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/ClassName.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/ClassName.java
new file mode 100644
index 0000000000..5eaef1282c
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/ClassName.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.datatypes;
+
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Represents a class name.
+ */
+public class ClassName extends DefaultAnonymizableDataType {
+  public static final String CLASSNAME_PRESERVE_CONFIG = "rumen.data-types.classname.preserve";
+  private final String className;
+  
+  public ClassName(String className) {
+    super();
+    this.className = className;
+  }
+  
+  @Override
+  public String getValue() {
+    return className;
+  }
+  
+  @Override
+  protected String getPrefix() {
+    return "class";
+  }
+  
+  @Override
+  protected boolean needsAnonymization(Configuration conf) {
+    String[] preserves = conf.getStrings(CLASSNAME_PRESERVE_CONFIG);
+    if (preserves != null) {
+      // do a simple starts with check
+      for (String p : preserves) {
+        if (className.startsWith(p)) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/DataType.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/DataType.java
new file mode 100644
index 0000000000..067eb32fbb
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/DataType.java
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.datatypes;
+
+/**
+ * Represents a Rumen data-type.
+ */
+public interface DataType<T> {
+  T getValue();
+}
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/DefaultAnonymizableDataType.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/DefaultAnonymizableDataType.java
new file mode 100644
index 0000000000..0791a4326a
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/DefaultAnonymizableDataType.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.datatypes;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.tools.rumen.anonymization.WordList;
+import org.apache.hadoop.tools.rumen.anonymization.WordListAnonymizerUtility;
+import org.apache.hadoop.tools.rumen.state.StatePool;
+
+/**
+ * Represents a default anonymizable Rumen data-type. It uses 
+ * {@link WordListAnonymizerUtility} for anonymization.
+ */
+public abstract class DefaultAnonymizableDataType 
+implements AnonymizableDataType<String> {
+  private static final String DEFAULT_PREFIX = "data";
+  
+  protected String getPrefix() {
+    return DEFAULT_PREFIX;
+  }
+  
+  // Determines if the contained data needs anonymization
+  protected boolean needsAnonymization(Configuration conf) {
+    return true;
+  }
+  
+  @Override
+  public final String getAnonymizedValue(StatePool statePool, 
+                                         Configuration conf) {
+    if (needsAnonymization(conf)) {
+      WordList state = (WordList) statePool.getState(getClass());
+      if (state == null) {
+        state = new WordList(getPrefix());
+        statePool.addState(getClass(), state);
+      }
+      return anonymize(getValue(), state);
+    } else {
+      return getValue();
+    }
+  }
+  
+  private static String anonymize(String data, WordList wordList) {
+    if (data == null) {
+      return null;
+    }
+
+    if (!wordList.contains(data)) {
+      wordList.add(data);
+    }
+    return wordList.getName() + wordList.indexOf(data);
+  }
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/DefaultDataType.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/DefaultDataType.java
new file mode 100644
index 0000000000..d00b89ec42
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/DefaultDataType.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.datatypes;
+
+/**
+ * This represents the default java data-types (like int, long, float etc).
+ */
+public class DefaultDataType implements DataType<String> {
+  private String value;
+  
+  public DefaultDataType(String value) {
+    this.value = value;
+  }
+  
+  /**
+   * Get the value of the attribute.
+   */
+  @Override
+  public String getValue() {
+    return value;
+  }
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/FileName.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/FileName.java
new file mode 100644
index 0000000000..c8bbb06a6d
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/FileName.java
@@ -0,0 +1,213 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.datatypes;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.tools.rumen.anonymization.WordList;
+import org.apache.hadoop.tools.rumen.anonymization.WordListAnonymizerUtility;
+import org.apache.hadoop.tools.rumen.state.State;
+import org.apache.hadoop.tools.rumen.state.StatePool;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Represents a file's location.
+ * 
+ * Currently, only filenames that can be represented using {@link Path} are 
+ * supported.
+ */
+public class FileName implements AnonymizableDataType<String> {
+  private final String fileName;
+  private String anonymizedFileName;
+  private static final String PREV_DIR = "..";
+  private static final String[] KNOWN_SUFFIXES = 
+    new String[] {".xml", ".jar", ".txt", ".tar", ".zip", ".json", ".gzip", 
+                  ".lzo"};
+  
+  /**
+   * A composite state for filename.
+   */
+  public static class FileNameState implements State {
+    private WordList dirState = new WordList("dir");
+    private WordList fileNameState =  new WordList("file");
+    
+    @Override
+    public boolean isUpdated() {
+      return dirState.isUpdated() || fileNameState.isUpdated();
+    }
+    
+    public WordList getDirectoryState() {
+      return dirState;
+    }
+    
+    public WordList getFileNameState() {
+      return fileNameState;
+    }
+    
+    public void setDirectoryState(WordList state) {
+      this.dirState = state;
+    }
+    
+    public void setFileNameState(WordList state) {
+      this.fileNameState = state;
+    }
+    
+    @Override
+    public String getName() {
+      return "path";
+    }
+    
+    @Override
+    public void setName(String name) {
+      // for now, simply assert since this class has a hardcoded name
+      if (!getName().equals(name)) {
+        throw new RuntimeException("State name mismatch! Expected '" 
+                                   + getName() + "' but found '" + name + "'.");
+      }
+    }
+  }
+  
+  public FileName(String fileName) {
+    this.fileName = fileName;
+  }
+  
+  @Override
+  public String getValue() {
+    return fileName;
+  }
+  
+  @Override
+  public String getAnonymizedValue(StatePool statePool, 
+                                   Configuration conf) {
+    if (anonymizedFileName == null) {
+      anonymize(statePool, conf);
+    }
+    return anonymizedFileName;
+  }
+  
+  private void anonymize(StatePool statePool, Configuration conf) {
+    FileNameState fState = (FileNameState) statePool.getState(getClass());
+    if (fState == null) {
+      fState = new FileNameState();
+      statePool.addState(getClass(), fState);
+    }
+    
+    String[] files = StringUtils.split(fileName);
+    String[] anonymizedFileNames = new String[files.length];
+    int i = 0;
+    for (String f : files) {
+      anonymizedFileNames[i++] = 
+        anonymize(statePool, conf, fState, f);
+    }
+
+    anonymizedFileName = StringUtils.arrayToString(anonymizedFileNames);
+  }
+  
+  private static String anonymize(StatePool statePool, Configuration conf, 
+                                  FileNameState fState, String fileName) {
+    String ret = null;
+    try {
+      URI uri = new URI(fileName);
+      
+      // anonymize the path i.e without the authority & scheme
+      ret = 
+        anonymizePath(uri.getPath(), fState.getDirectoryState(), 
+                      fState.getFileNameState());
+      
+      // anonymize the authority and scheme
+      String authority = uri.getAuthority();
+      String scheme = uri.getScheme();
+      if (scheme != null) {
+        String anonymizedAuthority = "";
+        if (authority != null) {
+          // anonymize the authority
+          NodeName hostName = new NodeName(null, uri.getHost());
+          anonymizedAuthority = hostName.getAnonymizedValue(statePool, conf);
+        }
+        ret = scheme + "://" + anonymizedAuthority + ret;
+      }
+    } catch (URISyntaxException use) {
+      throw new RuntimeException (use);
+    }
+    
+    return ret;
+  }
+  
+  // Anonymize the file-path
+  private static String anonymizePath(String path, WordList dState, 
+                                      WordList fState) {
+    StringBuilder buffer = new StringBuilder();
+    StringTokenizer tokenizer = new StringTokenizer(path, Path.SEPARATOR, true);
+    while (tokenizer.hasMoreTokens()) {
+      String token = tokenizer.nextToken();
+      if (Path.SEPARATOR.equals(token)) {
+        buffer.append(token);
+      } else if (Path.CUR_DIR.equals(token)) {
+        buffer.append(token);
+      } else if (PREV_DIR.equals(token)) {
+        buffer.append(token);
+      } else if (tokenizer.hasMoreTokens()){
+        // this component is a directory
+        buffer.append(anonymize(token, dState));
+      } else {
+        // this component is a file
+        buffer.append(anonymize(token, fState));
+      }
+    }
+    
+    return buffer.toString();
+  }
+  
+  //TODO There is no caching for saving memory.
+  private static String anonymize(String data, WordList wordList) {
+    if (data == null) {
+      return null;
+    }
+
+    if (WordListAnonymizerUtility.needsAnonymization(data)) {
+      String suffix = "";
+      String coreData = data;
+      // check and extract suffix
+      if (WordListAnonymizerUtility.hasSuffix(data, KNOWN_SUFFIXES)) {
+        // check if the data ends with a known suffix
+        String[] split = 
+          WordListAnonymizerUtility.extractSuffix(data, KNOWN_SUFFIXES);
+        suffix = split[1];
+        coreData = split[0];
+      }
+
+      // check if the data is known content
+      //TODO [Chunking] Do this for sub-strings of data
+      String anonymizedData = coreData;
+      if (!WordListAnonymizerUtility.isKnownData(coreData)) {
+        if (!wordList.contains(coreData)) {
+          wordList.add(coreData);
+        }
+        anonymizedData  = wordList.getName() + wordList.indexOf(coreData);
+      }
+
+      return anonymizedData + suffix;
+    } else {
+      return data;
+    }
+  }
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/JobName.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/JobName.java
new file mode 100644
index 0000000000..45edfe47ad
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/JobName.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.datatypes;
+
+
+/**
+ * Represents a job's name.
+ */
+public class JobName extends DefaultAnonymizableDataType {
+  private final String jobName;
+  
+  public JobName(String jobName) {
+    super();
+    this.jobName = jobName;
+  }
+  
+  @Override
+  public String getValue() {
+    return jobName;
+  }
+  
+  @Override
+  protected String getPrefix() {
+    return "job";
+  }
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/JobProperties.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/JobProperties.java
new file mode 100644
index 0000000000..7d63d8df22
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/JobProperties.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.datatypes;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.tools.rumen.datatypes.util.JobPropertyParser;
+import org.apache.hadoop.tools.rumen.datatypes.util.MapReduceJobPropertiesParser;
+import org.apache.hadoop.tools.rumen.state.StatePool;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/**
+ * This represents the job configuration properties.
+ */
+public class JobProperties implements AnonymizableDataType<Properties> {
+  public static final String PARSERS_CONFIG_KEY = 
+    "rumen.datatypes.jobproperties.parsers";
+  private final Properties jobProperties;
+  
+  public JobProperties() {
+    this(new Properties());
+  }
+  
+  public JobProperties(Properties properties) {
+    this.jobProperties = properties;
+  }
+  
+  public Properties getValue() {
+    return jobProperties;
+  }
+  
+  @Override
+  public Properties getAnonymizedValue(StatePool statePool, 
+                                       Configuration conf) {
+    Properties filteredProperties = null;
+    List<JobPropertyParser> pList = new ArrayList<JobPropertyParser>(1);
+    // load the parsers
+    String config = conf.get(PARSERS_CONFIG_KEY);
+    if (config != null) {
+      @SuppressWarnings("unchecked")
+      Class<JobPropertyParser>[] parsers = 
+        (Class[])conf.getClasses(PARSERS_CONFIG_KEY);
+      for (Class<JobPropertyParser> c : parsers) {
+        JobPropertyParser parser = ReflectionUtils.newInstance(c, conf);
+        pList.add(parser);
+      }
+    } else {
+      // add the default MapReduce filter
+      JobPropertyParser parser = new MapReduceJobPropertiesParser();
+      pList.add(parser);
+    }
+    
+    // filter out the desired config key-value pairs
+    if (jobProperties != null) {
+      filteredProperties = new Properties();
+      // define a configuration object and load it with original job properties
+      for (Map.Entry<Object, Object> entry : jobProperties.entrySet()) {
+        //TODO Check for null key/value?
+        String key = entry.getKey().toString();
+        String value = entry.getValue().toString(); 
+        
+        // find a parser for this key
+        for (JobPropertyParser p : pList) {
+          DataType<?> pValue = p.parseJobProperty(key, value);
+          if (pValue != null) {
+            filteredProperties.put(key, pValue);
+            break;
+          }
+        }
+      }
+    }
+    return filteredProperties;
+  }
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/NodeName.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/NodeName.java
new file mode 100644
index 0000000000..862ed5b829
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/NodeName.java
@@ -0,0 +1,185 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.datatypes;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.tools.rumen.ParsedHost;
+import org.apache.hadoop.tools.rumen.anonymization.WordList;
+import org.apache.hadoop.tools.rumen.state.State;
+import org.apache.hadoop.tools.rumen.state.StatePool;
+import org.codehaus.jackson.annotate.JsonIgnore;
+
+/**
+ * Represents the cluster host.
+ */
+public class NodeName implements AnonymizableDataType<String> {
+  private String hostName;
+  private String rackName;
+  private String nodeName;
+  private String anonymizedNodeName;
+  
+  public static final NodeName ROOT = new NodeName("<root>");
+  
+  /**
+   * A composite state for node-name.
+   */
+  public static class NodeNameState implements State {
+    private WordList rackNameState = new WordList("rack");
+    private WordList hostNameState =  new WordList("host");
+    
+    @Override
+    @JsonIgnore
+    public boolean isUpdated() {
+      return rackNameState.isUpdated() || hostNameState.isUpdated();
+    }
+    
+    public WordList getRackNameState() {
+      return rackNameState;
+    }
+    
+    public WordList getHostNameState() {
+      return hostNameState;
+    }
+    
+    public void setRackNameState(WordList state) {
+      this.rackNameState = state;
+    }
+    
+    public void setHostNameState(WordList state) {
+      this.hostNameState = state;
+    }
+    
+    @Override
+    public String getName() {
+      return "node";
+    }
+    
+    @Override
+    public void setName(String name) {
+      // for now, simply assert since this class has a hardcoded name
+      if (!getName().equals(name)) {
+        throw new RuntimeException("State name mismatch! Expected '" 
+                                   + getName() + "' but found '" + name + "'.");
+      }
+    }
+  }
+  
+  public NodeName(String nodeName) {
+    this.nodeName = nodeName;
+    ParsedHost pHost = ParsedHost.parse(nodeName);
+    if (pHost == null) {
+      this.rackName = null;
+      this.hostName = nodeName;
+    } else {
+      //TODO check for null and improve .. possibly call NodeName(r,h)
+      this.rackName = pHost.getRackName();
+      this.hostName = pHost.getNodeName();
+    }
+  }
+  
+  public NodeName(String rName, String hName) {
+    rName = (rName == null) 
+            ? rName 
+            : rName.length() == 0 
+              ? null 
+              : rName;
+    hName = (hName == null) 
+            ? hName 
+            : hName.length() == 0 
+              ? null 
+              : hName;
+    if (hName == null) {
+      nodeName = rName;
+      rackName = rName;
+    } else if (rName == null) {
+      nodeName = hName;
+      ParsedHost pHost = ParsedHost.parse(nodeName);
+      if (pHost == null) {
+        this.rackName = null;
+        this.hostName = hName;
+      } else {
+        this.rackName = pHost.getRackName();
+        this.hostName = pHost.getNodeName();
+      }
+    } else {
+      rackName = rName;
+      this.hostName = hName;
+      this.nodeName = "/" + rName + "/" + hName;
+    }
+  }
+  
+  public String getHostName() {
+    return hostName;
+  }
+  
+  public String getRackName() {
+    return rackName;
+  }
+  
+  @Override
+  public String getValue() {
+    return nodeName;
+  }
+  
+  @Override
+  public String getAnonymizedValue(StatePool statePool, Configuration conf) {
+    if (this.getValue().equals(ROOT.getValue())) {
+      return getValue();
+    }
+    if (anonymizedNodeName == null) {
+      anonymize(statePool);
+    }
+    return anonymizedNodeName;
+  }
+  
+  private void anonymize(StatePool pool) {
+    StringBuffer buf = new StringBuffer();
+    NodeNameState state = (NodeNameState) pool.getState(getClass());
+    if (state == null) {
+      state = new NodeNameState();
+      pool.addState(getClass(), state);
+    }
+    
+    if (rackName != null && hostName != null) {
+      buf.append('/');
+      buf.append(anonymize(rackName, state.getRackNameState()));
+      buf.append('/');
+      buf.append(anonymize(hostName, state.getHostNameState()));
+    } else {
+      if (state.getRackNameState().contains(nodeName) || rackName != null) {
+        buf.append(anonymize(nodeName, state.getRackNameState()));
+      } else {
+        buf.append(anonymize(nodeName, state.getHostNameState()));
+      }
+    }
+    
+    anonymizedNodeName = buf.toString();
+  }
+  
+  //TODO There is no caching for saving memory.
+  private static String anonymize(String data, WordList wordList) {
+    if (data == null) {
+      return null;
+    }
+
+    if (!wordList.contains(data)) {
+      wordList.add(data);
+    }
+    return wordList.getName() + wordList.indexOf(data);
+  }
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/QueueName.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/QueueName.java
new file mode 100644
index 0000000000..8af61bc4ad
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/QueueName.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.datatypes;
+
+
+/**
+ * Represents a queue name.
+ */
+public class QueueName extends DefaultAnonymizableDataType {
+  private final String queueName;
+  
+  public QueueName(String queueName) {
+    super();
+    this.queueName = queueName;
+  }
+  
+  @Override
+  public String getValue() {
+    return queueName;
+  }
+  
+  @Override
+  protected String getPrefix() {
+    return "queue";
+  };
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/UserName.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/UserName.java
new file mode 100644
index 0000000000..b6516f4737
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/UserName.java
@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.datatypes;
+
+/**
+ * Represents a user's name.
+ */
+public class UserName extends DefaultAnonymizableDataType {
+  private final String userName;
+  
+  public UserName(String userName) {
+    super();
+    this.userName = userName;
+  }
+  
+  @Override
+  public String getValue() {
+    return userName;
+  }
+  
+  @Override
+  protected String getPrefix() {
+    return "user";
+  }
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/util/DefaultJobPropertiesParser.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/util/DefaultJobPropertiesParser.java
new file mode 100644
index 0000000000..2868ccdf4c
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/util/DefaultJobPropertiesParser.java
@@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.datatypes.util;
+
+import org.apache.hadoop.tools.rumen.datatypes.DataType;
+import org.apache.hadoop.tools.rumen.datatypes.DefaultDataType;
+
+/**
+ * A simple job property parser that acts like a pass-through filter.
+ */
+public class DefaultJobPropertiesParser implements JobPropertyParser {
+  @Override
+  public DataType<?> parseJobProperty(String key, String value) {
+    return new DefaultDataType(value);
+  }
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/util/JobPropertyParser.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/util/JobPropertyParser.java
new file mode 100644
index 0000000000..3b031f2076
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/util/JobPropertyParser.java
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.datatypes.util;
+
+import org.apache.hadoop.tools.rumen.datatypes.DataType;
+import org.apache.hadoop.tools.rumen.datatypes.JobProperties;
+
+/**
+ * A {@link JobProperties} parsing utility.
+ */
+public interface JobPropertyParser {
+  /**
+   * Parse the specified job configuration key-value pair.
+   * 
+   * @return Returns a {@link DataType} if this parser can parse this value.
+   *         Returns 'null' otherwise.
+   */
+  public DataType<?> parseJobProperty(String key, String value);
+}
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/util/MapReduceJobPropertiesParser.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/util/MapReduceJobPropertiesParser.java
new file mode 100644
index 0000000000..e237052ca9
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/datatypes/util/MapReduceJobPropertiesParser.java
@@ -0,0 +1,227 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.datatypes.util;
+
+import java.lang.reflect.Field;
+import java.text.DecimalFormat;
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.tools.rumen.datatypes.*;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+
+/**
+ * A default parser for MapReduce job configuration properties.
+ * MapReduce job configuration properties are represented as key-value pairs. 
+ * Each key represents a configuration knob which controls or affects the 
+ * behavior of a MapReduce job or a job's task. The value associated with the 
+ * configuration key represents its value. Some of the keys are deprecated. As a
+ * result of deprecation some keys change or are preferred over other keys, 
+ * across versions. {@link MapReduceJobPropertiesParser} is a utility class that
+ * parses MapReduce job configuration properties and converts the value into a 
+ * well defined {@link DataType}. Users can use the
+ * {@link MapReduceJobPropertiesParser#parseJobProperty()} API to process job 
+ * configuration parameters. This API will parse a job property represented as a
+ * key-value pair and return the value wrapped inside a {@link DataType}. 
+ * Callers can then use the returned {@link DataType} for further processing.
+ * 
+ * {@link MapReduceJobPropertiesParser} thrives on the key name to decide which
+ * {@link DataType} to wrap the value with. Values for keys representing 
+ * job-name, queue-name, user-name etc are wrapped inside {@link JobName}, 
+ * {@link QueueName}, {@link UserName} etc respectively. Keys ending with *dir* 
+ * are considered as a directory and hence gets be wrapped inside 
+ * {@link FileName}. Similarly key ending with *codec*, *log*, *class* etc are
+ * also handled accordingly. Values representing basic java data-types like 
+ * integer, float, double, boolean etc are wrapped inside 
+ * {@link DefaultDataType}. If the key represents some jvm-level settings then 
+ * only standard settings are extracted and gets wrapped inside 
+ * {@link DefaultDataType}. Currently only '-Xmx' and '-Xms' settings are 
+ * considered while the rest are ignored.
+ * 
+ * Note that the {@link MapReduceJobPropertiesParser#parseJobProperty()} API 
+ * maps the keys to a configuration parameter listed in 
+ * {@link MRJobConfig}. This not only filters non-framework specific keys thus 
+ * ignoring user-specific and hard-to-parse keys but also provides a consistent
+ * view for all possible inputs. So if users invoke the 
+ * {@link MapReduceJobPropertiesParser#parseJobProperty()} API with either
+ * <"mapreduce.job.user.name", "bob"> or <"user.name", "bob">, then the result 
+ * would be a {@link UserName} {@link DataType} wrapping the user-name "bob".
+ */
+@SuppressWarnings("deprecation")
+public class MapReduceJobPropertiesParser implements JobPropertyParser {
+  private Field[] mrFields = MRJobConfig.class.getFields();
+  private DecimalFormat format = new DecimalFormat();
+  private JobConf configuration = new JobConf(false);
+  private static final Pattern MAX_HEAP_PATTERN = 
+    Pattern.compile("-Xmx[0-9]+[kKmMgGtT]?+");
+  private static final Pattern MIN_HEAP_PATTERN = 
+    Pattern.compile("-Xms[0-9]+[kKmMgGtT]?+");
+  
+  // turn off the warning w.r.t deprecated mapreduce keys
+  static {
+    Logger.getLogger(Configuration.class).setLevel(Level.OFF);
+  }
+    
+  // Accepts a key if there is a corresponding key in the current mapreduce
+  // configuration
+  private boolean accept(String key) {
+    return getLatestKeyName(key) != null;
+  }
+  
+  // Finds a corresponding key for the specified key in the current mapreduce
+  // setup.
+  // Note that this API uses a cached copy of the Configuration object. This is
+  // purely for performance reasons.
+  private String getLatestKeyName(String key) {
+    // set the specified key
+    configuration.set(key, key);
+    try {
+      // check if keys in MRConfig maps to the specified key.
+      for (Field f : mrFields) {
+        String mrKey = f.get(f.getName()).toString();
+        if (configuration.get(mrKey) != null) {
+          return mrKey;
+        }
+      }
+      
+      // unset the key
+      return null;
+    } catch (IllegalAccessException iae) {
+      throw new RuntimeException(iae);
+    } finally {
+      // clean up!
+      configuration.clear();
+    }
+  }
+  
+  @Override
+  public DataType<?> parseJobProperty(String key, String value) {
+    if (accept(key)) {
+      return fromString(key, value);
+    }
+    
+    return null;
+  }
+  
+  /**
+   * Extracts the -Xmx heap option from the specified string.
+   */
+  public static void extractMaxHeapOpts(String javaOptions, 
+                                        List<String> heapOpts, 
+                                        List<String> others) {
+    for (String opt : javaOptions.split(" ")) {
+      Matcher matcher = MAX_HEAP_PATTERN.matcher(opt);
+      if (matcher.find()) {
+        heapOpts.add(opt);
+      } else {
+        others.add(opt);
+      }
+    }
+  }
+  
+  /**
+   * Extracts the -Xms heap option from the specified string.
+   */
+  public static void extractMinHeapOpts(String javaOptions,  
+      List<String> heapOpts,  List<String> others) {
+    for (String opt : javaOptions.split(" ")) {
+      Matcher matcher = MIN_HEAP_PATTERN.matcher(opt);
+      if (matcher.find()) {
+        heapOpts.add(opt);
+      } else {
+        others.add(opt);
+      }
+    }
+  }
+  
+  // Maps the value of the specified key.
+  private DataType<?> fromString(String key, String value) {
+    if (value != null) {
+      // check known configs
+      //  job-name
+      String latestKey = getLatestKeyName(key);
+      
+      if (MRJobConfig.JOB_NAME.equals(latestKey)) {
+        return new JobName(value);
+      }
+      // user-name
+      if (MRJobConfig.USER_NAME.equals(latestKey)) {
+        return new UserName(value);
+      }
+      // queue-name
+      if (MRJobConfig.QUEUE_NAME.equals(latestKey)) {
+        return new QueueName(value);
+      }
+      if (MRJobConfig.MAP_JAVA_OPTS.equals(latestKey) 
+          || MRJobConfig.REDUCE_JAVA_OPTS.equals(latestKey)) {
+        List<String> heapOptions = new ArrayList<String>();
+        extractMaxHeapOpts(value, heapOptions, new ArrayList<String>());
+        extractMinHeapOpts(value, heapOptions, new ArrayList<String>());
+        return new DefaultDataType(StringUtils.join(heapOptions, ' '));
+      }
+      
+      //TODO compression?
+      //TODO Other job configs like FileOutputFormat/FileInputFormat etc
+
+      // check if the config parameter represents a number
+      try {
+        format.parse(value);
+        return new DefaultDataType(value);
+      } catch (ParseException pe) {}
+
+      // check if the config parameters represents a boolean 
+      // avoiding exceptions
+      if ("true".equals(value) || "false".equals(value)) {
+        Boolean.parseBoolean(value);
+        return new DefaultDataType(value);
+      }
+
+      // check if the config parameter represents a class
+      if (latestKey.endsWith(".class") || latestKey.endsWith(".codec")) {
+        return new ClassName(value);
+      }
+
+      // handle distributed cache sizes and timestamps
+      if (latestKey.endsWith("sizes") 
+          || latestKey.endsWith(".timestamps")) {
+        new DefaultDataType(value);
+      }
+      
+      // check if the config parameter represents a file-system path
+      //TODO: Make this concrete .location .path .dir .jar?
+      if (latestKey.endsWith(".dir") || latestKey.endsWith(".location") 
+          || latestKey.endsWith(".jar") || latestKey.endsWith(".path") 
+          || latestKey.endsWith(".logfile") || latestKey.endsWith(".file")
+          || latestKey.endsWith(".files") || latestKey.endsWith(".archives")) {
+        try {
+          return new FileName(value);
+        } catch (Exception ioe) {}
+      }
+    }
+
+    return null;
+  }
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/BlockingSerializer.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/BlockingSerializer.java
new file mode 100644
index 0000000000..4338602f9c
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/BlockingSerializer.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.serializers;
+
+import java.io.IOException;
+
+import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.JsonProcessingException;
+import org.codehaus.jackson.map.JsonSerializer;
+import org.codehaus.jackson.map.SerializerProvider;
+
+/**
+ * A JSON serializer for Strings.
+ */
+public class BlockingSerializer extends JsonSerializer<String> {
+  
+  public void serialize(String object, JsonGenerator jGen, SerializerProvider sProvider) 
+  throws IOException, JsonProcessingException {
+    jGen.writeNull();
+  };
+}
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/DefaultAnonymizingRumenSerializer.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/DefaultAnonymizingRumenSerializer.java
new file mode 100644
index 0000000000..d4e6fd5533
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/DefaultAnonymizingRumenSerializer.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.serializers;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.tools.rumen.datatypes.AnonymizableDataType;
+import org.apache.hadoop.tools.rumen.state.StatePool;
+import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.JsonProcessingException;
+import org.codehaus.jackson.map.JsonSerializer;
+import org.codehaus.jackson.map.SerializerProvider;
+
+/**
+ * Default Rumen JSON serializer.
+ */
+@SuppressWarnings("unchecked")
+public class DefaultAnonymizingRumenSerializer 
+  extends JsonSerializer<AnonymizableDataType> {
+  private StatePool statePool;
+  private Configuration conf;
+  
+  public DefaultAnonymizingRumenSerializer(StatePool statePool, 
+                                           Configuration conf) {
+    this.statePool = statePool;
+    this.conf = conf;
+  }
+  
+  public void serialize(AnonymizableDataType object, JsonGenerator jGen, 
+                        SerializerProvider sProvider) 
+  throws IOException, JsonProcessingException {
+    Object val = object.getAnonymizedValue(statePool, conf);
+    // output the data if its a string
+    if (val instanceof String) {
+      jGen.writeString(val.toString());
+    } else {
+      // let the mapper (JSON generator) handle this anonymized object.
+      jGen.writeObject(val);
+    }
+  };
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/DefaultRumenSerializer.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/DefaultRumenSerializer.java
new file mode 100644
index 0000000000..1b433d8846
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/DefaultRumenSerializer.java
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.serializers;
+
+import java.io.IOException;
+
+import org.apache.hadoop.tools.rumen.datatypes.DataType;
+import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.JsonProcessingException;
+import org.codehaus.jackson.map.JsonSerializer;
+import org.codehaus.jackson.map.SerializerProvider;
+
+/**
+ * Default Rumen JSON serializer.
+ */
+@SuppressWarnings("unchecked")
+public class DefaultRumenSerializer extends JsonSerializer<DataType> {
+  public void serialize(DataType object, JsonGenerator jGen, SerializerProvider sProvider) 
+  throws IOException, JsonProcessingException {
+    Object data = object.getValue();
+    if (data instanceof String) {
+      jGen.writeString(data.toString());
+    } else {
+      jGen.writeObject(data);
+    }
+  };
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/ObjectStringSerializer.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/ObjectStringSerializer.java
new file mode 100644
index 0000000000..69e8950d83
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/serializers/ObjectStringSerializer.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.serializers;
+
+import java.io.IOException;
+
+import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.JsonProcessingException;
+import org.codehaus.jackson.map.JsonSerializer;
+import org.codehaus.jackson.map.SerializerProvider;
+
+/**
+ * Rumen JSON serializer for serializing object using toSring() API.
+ */
+public class ObjectStringSerializer<T> extends JsonSerializer<T> {
+  public void serialize(T object, JsonGenerator jGen, SerializerProvider sProvider) 
+  throws IOException, JsonProcessingException {
+    jGen.writeString(object.toString());
+  };
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/state/State.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/state/State.java
new file mode 100644
index 0000000000..94a78c29da
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/state/State.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.state;
+
+import org.codehaus.jackson.annotate.JsonIgnore;
+
+/**
+ * Represents a state. This state is managed by {@link StatePool}.
+ * 
+ * Note that a {@link State} objects should be persistable. Currently, the 
+ * {@link State} objects are persisted using the Jackson JSON library. Hence the
+ * implementors of the {@link State} interface should be careful while defining 
+ * their public setter and getter APIs.  
+ */
+public interface State {
+  /**
+   * Returns true if the state is updated since creation (or reload).
+   */
+  @JsonIgnore
+  boolean isUpdated();
+  
+  /**
+   * Get the name of the state.
+   */
+  public String getName();
+  
+  /**
+   * Set the name of the state.
+   */
+  public void setName(String name);
+}
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/state/StateDeserializer.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/state/StateDeserializer.java
new file mode 100644
index 0000000000..4f9cc9d0ee
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/state/StateDeserializer.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.state;
+
+import java.io.IOException;
+
+import org.apache.hadoop.tools.rumen.state.StatePool.StatePair;
+import org.codehaus.jackson.JsonParser;
+import org.codehaus.jackson.JsonProcessingException;
+import org.codehaus.jackson.map.DeserializationContext;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.map.deser.StdDeserializer;
+import org.codehaus.jackson.node.ObjectNode;
+
+/**
+ * Rumen JSON deserializer for deserializing the {@link State} object.
+ */
+public class StateDeserializer extends StdDeserializer<StatePair> {
+  public StateDeserializer() {
+      super(StatePair.class);
+  }
+  
+  @Override
+  public StatePair deserialize(JsonParser parser, 
+                               DeserializationContext context)
+  throws IOException, JsonProcessingException {
+    ObjectMapper mapper = (ObjectMapper) parser.getCodec();
+    // set the state-pair object tree
+    ObjectNode statePairObject = (ObjectNode) mapper.readTree(parser);
+    Class<?> stateClass = null;
+    
+    try {
+      stateClass = 
+        Class.forName(statePairObject.get("className").getTextValue().trim());
+    } catch (ClassNotFoundException cnfe) {
+      throw new RuntimeException("Invalid classname!", cnfe);
+    }
+    
+    String stateJsonString = statePairObject.get("state").toString();
+    State state = (State) mapper.readValue(stateJsonString, stateClass);
+    
+    return new StatePair(state);
+  }
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/state/StatePool.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/state/StatePool.java
new file mode 100644
index 0000000000..576a3c0edc
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/state/StatePool.java
@@ -0,0 +1,345 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen.state;
+
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.HashMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.tools.rumen.Anonymizer;
+import org.apache.hadoop.tools.rumen.datatypes.DataType;
+import org.codehaus.jackson.JsonEncoding;
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.JsonGenerator;
+import org.codehaus.jackson.JsonParser;
+import org.codehaus.jackson.Version;
+import org.codehaus.jackson.annotate.JsonIgnore;
+import org.codehaus.jackson.map.DeserializationConfig;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.map.SerializationConfig;
+import org.codehaus.jackson.map.module.SimpleModule;
+
+/**
+ * A pool of states. States used by {@link DataType}'s can be managed the 
+ * {@link StatePool}. {@link StatePool} also supports persistence. Persistence
+ * is key to share states across multiple {@link Anonymizer} runs.
+ */
+@SuppressWarnings("unchecked")
+public class StatePool {
+  private static final long VERSION = 1L;
+  private boolean isUpdated = false;
+  private boolean isInitialized = false;
+  private Configuration conf;
+  
+  // persistence configuration
+  public static final String DIR_CONFIG = "rumen.anonymization.states.dir";
+  public static final String RELOAD_CONFIG = 
+    "rumen.anonymization.states.reload";
+  public static final String PERSIST_CONFIG = 
+    "rumen.anonymization.states.persist";
+  
+  // internal state management configs
+  private static final String COMMIT_STATE_FILENAME = "latest";
+  private static final String CURRENT_STATE_FILENAME = "temp";
+  
+  private String timeStamp;
+  private Path persistDirPath;
+  private boolean reload;
+  private boolean persist;
+  
+  /**
+   * A wrapper class that binds the state implementation to its implementing 
+   * class name.
+   */
+  public static class StatePair {
+    private String className;
+    private State state;
+    
+    public StatePair(State state) {
+      this.className = state.getClass().getName();
+      this.state = state;
+    }
+    
+    public String getClassName() {
+      return className;
+    }
+    
+    public void setClassName(String className) {
+      this.className = className;
+    }
+    
+    public State getState() {
+      return state;
+    }
+    
+    public void setState(State state) {
+      this.state = state;
+    }
+  }
+  
+  /**
+   * Identifies to identify and cache {@link State}s.
+   */
+  private HashMap<String, StatePair> pool = new HashMap<String, StatePair>();
+  
+  public void addState(Class id, State state) {
+    if (pool.containsKey(id.getName())) {
+      throw new RuntimeException("State '" + state.getName() + "' added for the" 
+          + " class " + id.getName() + " already exists!");
+    }
+    isUpdated = true;
+    pool.put(id.getName(), new StatePair(state));
+  }
+  
+  public State getState(Class clazz) {
+    return pool.containsKey(clazz.getName()) 
+           ? pool.get(clazz.getName()).getState() 
+           : null;
+  }
+  
+  // For testing
+  @JsonIgnore
+  public boolean isUpdated() {
+    if (!isUpdated) {
+      for (StatePair statePair : pool.values()) {
+        // if one of the states have changed, then the pool is dirty
+        if (statePair.getState().isUpdated()) {
+          isUpdated = true;
+          return true;
+        }
+      }
+    }
+    return isUpdated;
+  }
+  
+  /**
+   * Initialized the {@link StatePool}. This API also reloads the previously
+   * persisted state. Note that the {@link StatePool} should be initialized only
+   * once.
+   */
+  public void initialize(Configuration conf) throws Exception {
+    if (isInitialized) {
+      throw new RuntimeException("StatePool is already initialized!");
+    }
+    
+    this.conf = conf;
+    String persistDir = conf.get(DIR_CONFIG);
+    reload = conf.getBoolean(RELOAD_CONFIG, false);
+    persist = conf.getBoolean(PERSIST_CONFIG, false);
+    
+    // reload if configured
+    if (reload || persist) {
+      System.out.println("State Manager initializing. State directory : " 
+                         + persistDir);
+      System.out.println("Reload:" + reload + " Persist:" + persist);
+      if (persistDir == null) {
+        throw new RuntimeException("No state persist directory configured!" 
+                                   + " Disable persistence.");
+      } else {
+        this.persistDirPath = new Path(persistDir);
+      }
+    } else {
+      System.out.println("State Manager disabled.");
+    }
+    
+    // reload
+    reload();
+    
+    // now set the timestamp
+    DateFormat formatter = 
+      new SimpleDateFormat("dd-MMM-yyyy-hh'H'-mm'M'-ss'S'");
+    Calendar calendar = Calendar.getInstance();
+    calendar.setTimeInMillis(System.currentTimeMillis());
+    timeStamp = formatter.format(calendar.getTime());
+    
+    isInitialized = true;
+  }
+  
+  private void reload() throws Exception {
+    if (reload) {
+      // Reload persisted entries
+      Path stateFilename = new Path(persistDirPath, COMMIT_STATE_FILENAME);
+      FileSystem fs = stateFilename.getFileSystem(conf);
+      if (fs.exists(stateFilename)) {
+        reloadState(stateFilename, conf);
+      } else {
+        throw new RuntimeException("No latest state persist directory found!" 
+                                   + " Disable persistence and run.");
+      }
+    }
+  }
+  
+  private void reloadState(Path stateFile, Configuration conf) 
+  throws Exception {
+    FileSystem fs = stateFile.getFileSystem(conf);
+    if (fs.exists(stateFile)) {
+      System.out.println("Reading state from " + stateFile.toString());
+      FSDataInputStream in = fs.open(stateFile);
+      
+      read(in);
+      in.close();
+    } else {
+      System.out.println("No state information found for " + stateFile);
+    }
+  }
+  
+  private void read(DataInput in) throws IOException {
+    ObjectMapper mapper = new ObjectMapper();
+    mapper.configure(
+        DeserializationConfig.Feature.CAN_OVERRIDE_ACCESS_MODIFIERS, true);
+    
+    // define a module
+    SimpleModule module = new SimpleModule("State Serializer",  
+        new Version(0, 1, 1, "FINAL"));
+    // add the state deserializer
+    module.addDeserializer(StatePair.class, new StateDeserializer());
+
+    // register the module with the object-mapper
+    mapper.registerModule(module);
+
+    JsonParser parser = 
+      mapper.getJsonFactory().createJsonParser((DataInputStream)in);
+    StatePool statePool = mapper.readValue(parser, StatePool.class);
+    this.setStates(statePool.getStates());
+    parser.close();
+  }
+  
+  /**
+   * Persists the current state to the state directory. The state will be 
+   * persisted to the 'latest' file in the state directory.
+   */
+  public void persist() throws IOException {
+    if (!persist) {
+      return;
+    }
+    if (isUpdated()) {
+      System.out.println("State is updated! Committing.");
+      Path currStateFile = new Path(persistDirPath, CURRENT_STATE_FILENAME);
+      Path commitStateFile = new Path(persistDirPath, COMMIT_STATE_FILENAME);
+      FileSystem fs = currStateFile.getFileSystem(conf);
+
+      System.out.println("Starting the persist phase. Persisting to " 
+                         + currStateFile.toString());
+      // persist current state 
+      //  write the contents of the current state to the current(temp) directory
+      FSDataOutputStream out = fs.create(currStateFile, true);
+      write(out);
+      out.close();
+
+      System.out.println("Persist phase over. The best known un-committed state"
+                         + " is located at " + currStateFile.toString());
+
+      // commit (phase-1) 
+      //  copy the previous commit file to the relocation file
+      if (fs.exists(commitStateFile)) {
+        Path commitRelocationFile = new Path(persistDirPath, timeStamp);
+        System.out.println("Starting the pre-commit phase. Moving the previous " 
+            + "best known state to " + commitRelocationFile.toString());
+        // copy the commit file to the relocation file
+        FileUtil.copy(fs,commitStateFile, fs, commitRelocationFile, false, 
+                      conf);
+      }
+
+      // commit (phase-2)
+      System.out.println("Starting the commit phase. Committing the states in " 
+                         + currStateFile.toString());
+      FileUtil.copy(fs, currStateFile, fs, commitStateFile, true, true, conf);
+
+      System.out.println("Commit phase successful! The best known committed " 
+                         + "state is located at " + commitStateFile.toString());
+    } else {
+      System.out.println("State not updated! No commit required.");
+    }
+  }
+  
+  private void write(DataOutput out) throws IOException {
+    // This is just a JSON experiment
+    System.out.println("Dumping the StatePool's in JSON format.");
+    ObjectMapper outMapper = new ObjectMapper();
+    outMapper.configure(
+        SerializationConfig.Feature.CAN_OVERRIDE_ACCESS_MODIFIERS, true);
+    // define a module
+    SimpleModule module = new SimpleModule("State Serializer",  
+        new Version(0, 1, 1, "FINAL"));
+    // add the state serializer
+    //module.addSerializer(State.class, new StateSerializer());
+
+    // register the module with the object-mapper
+    outMapper.registerModule(module);
+
+    JsonFactory outFactory = outMapper.getJsonFactory();
+    JsonGenerator jGen = 
+      outFactory.createJsonGenerator((DataOutputStream)out, JsonEncoding.UTF8);
+    jGen.useDefaultPrettyPrinter();
+
+    jGen.writeObject(this);
+    jGen.close();
+  }
+  
+  /**
+   * Getters and setters for JSON serialization
+   */
+  
+  /**
+   * To be invoked only by the Jackson JSON serializer.
+   */
+  public long getVersion() {
+    return VERSION;
+  }
+  
+  /**
+   * To be invoked only by the Jackson JSON deserializer.
+   */
+  public void setVersion(long version) {
+    if (version != VERSION) {
+      throw new RuntimeException("Version mismatch! Expected " + VERSION 
+                                 + " got " + version);
+    }
+  }
+  
+  /**
+   * To be invoked only by the Jackson JSON serializer.
+   */
+  public HashMap<String, StatePair> getStates() {
+    return pool;
+  }
+  
+  /**
+   * To be invoked only by the Jackson JSON deserializer.
+   */
+  public void setStates(HashMap<String, StatePair> states) {
+    if (pool.size() > 0) {
+      throw new RuntimeException("Pool not empty!");
+    }
+    
+    //TODO Should we do a clone?
+    this.pool = states;
+  }
+}
\ No newline at end of file

From feb3a23c71a5e000f1100e1ca6459c1fade8befc Mon Sep 17 00:00:00 2001
From: Vinod Kumar Vavilapalli <vinodkv@apache.org>
Date: Fri, 16 Dec 2011 23:41:36 +0000
Subject: [PATCH 6/9] MAPREDUCE-3387. Fixed AM's tracking URL to always go
 through the proxy, even before the job started, so that it works properly
 with oozie throughout the job execution. Contributed by Robert Joseph Evans.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1215345 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt          |  4 ++
 .../rmapp/attempt/RMAppAttemptImpl.java       |  9 ++-
 .../resourcemanager/webapp/dao/AppInfo.java   |  5 +-
 .../attempt/TestRMAppAttemptTransitions.java  |  5 +-
 .../yarn/server/webproxy/ProxyUriUtils.java   |  8 ++-
 .../server/webproxy/TestProxyUriUtils.java    | 58 +++++++------------
 6 files changed, 43 insertions(+), 46 deletions(-)

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 6d198a745e..010b34aa65 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -327,6 +327,10 @@ Release 0.23.1 - Unreleased
     MAPREDUCE-3366. Mapreduce component should use consistent directory structure 
     layout as HDFS/common (Eric Yang via mahadev)
 
+    MAPREDUCE-3387. Fixed AM's tracking URL to always go through the proxy, even
+    before the job started, so that it works properly with oozie throughout
+    the job execution. (Robert Joseph Evans via vinodkv)
+
 Release 0.23.0 - 2011-11-01 
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
index 71dd982b60..0f695fda9f 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
@@ -273,6 +273,8 @@ public RMAppAttemptImpl(ApplicationAttemptId appAttemptId,
     this.readLock = lock.readLock();
     this.writeLock = lock.writeLock();
 
+    this.proxiedTrackingUrl = generateProxyUriWithoutScheme();
+    
     this.stateMachine = stateMachineFactory.make(this);
   }
 
@@ -358,11 +360,16 @@ public String getWebProxyBase() {
     }    
   }
   
+  private String generateProxyUriWithoutScheme() {
+    return generateProxyUriWithoutScheme(null);
+  }
+  
   private String generateProxyUriWithoutScheme(
       final String trackingUriWithoutScheme) {
     this.readLock.lock();
     try {
-      URI trackingUri = ProxyUriUtils.getUriFromAMUrl(trackingUriWithoutScheme);
+      URI trackingUri = trackingUriWithoutScheme == null ? null : 
+        ProxyUriUtils.getUriFromAMUrl(trackingUriWithoutScheme);
       URI proxyUri = ProxyUriUtils.getUriFromAMUrl(proxy);
       URI result = ProxyUriUtils.getProxyUri(trackingUri, proxyUri, 
           applicationAttemptId.getApplicationId());
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java
index b2600ae0ea..f7f54aae3f 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java
@@ -80,8 +80,10 @@ public AppInfo(RMApp app, Boolean hasAccess) {
 
     if (app != null) {
       String trackingUrl = app.getTrackingUrl();
+      this.state = app.getState();
       this.trackingUrlIsNotReady = trackingUrl == null || trackingUrl.isEmpty()
-          || "N/A".equalsIgnoreCase(trackingUrl);
+          || RMAppState.NEW == this.state || RMAppState.SUBMITTED == this.state
+          || RMAppState.ACCEPTED == this.state;
       this.trackingUI = this.trackingUrlIsNotReady ? "UNASSIGNED" : (app
           .getFinishTime() == 0 ? "ApplicationMaster" : "History");
       if (!trackingUrlIsNotReady) {
@@ -95,7 +97,6 @@ public AppInfo(RMApp app, Boolean hasAccess) {
       this.user = app.getUser().toString();
       this.name = app.getName().toString();
       this.queue = app.getQueue().toString();
-      this.state = app.getState();
       this.progress = app.getProgress() * 100;
       this.diagnostics = app.getDiagnostics().toString();
       if (diagnostics == null || diagnostics.isEmpty()) {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
index 09699fbf91..1059e58ab9 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
@@ -17,8 +17,7 @@
 */
 package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
+import static org.junit.Assert.*;
 import static org.mockito.Matchers.*;
 import static org.mockito.Mockito.*;
 
@@ -203,6 +202,8 @@ private void testAppAttemptNewState() {
     assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001);
     assertEquals(0, applicationAttempt.getRanNodes().size());
     assertNull(applicationAttempt.getFinalApplicationStatus());
+    assertNotNull(applicationAttempt.getTrackingUrl());
+    assertFalse("N/A".equals(applicationAttempt.getTrackingUrl()));
   }
 
   /**
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java
index e9bc0c81f8..61e31eee93 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/ProxyUriUtils.java
@@ -114,7 +114,8 @@ private static boolean appendQuery(StringBuilder builder, String query,
   
   /**
    * Get a proxied URI for the original URI.
-   * @param originalUri the original URI to go through the proxy
+   * @param originalUri the original URI to go through the proxy, or null if
+   * a default path "/" can be used. 
    * @param proxyUri the URI of the proxy itself, scheme, host and port are used.
    * @param id the id of the application
    * @return the proxied URI
@@ -122,9 +123,10 @@ private static boolean appendQuery(StringBuilder builder, String query,
   public static URI getProxyUri(URI originalUri, URI proxyUri,
       ApplicationId id) {
     try {
-      String path = getPath(id, originalUri.getPath());
+      String path = getPath(id, originalUri == null ? "/" : originalUri.getPath());
       return new URI(proxyUri.getScheme(), proxyUri.getAuthority(), path,
-          originalUri.getQuery(), originalUri.getFragment());
+          originalUri == null ? null : originalUri.getQuery(),
+          originalUri == null ? null : originalUri.getFragment());
     } catch (URISyntaxException e) {
       throw new RuntimeException("Could not proxify "+originalUri,e);
     }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestProxyUriUtils.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestProxyUriUtils.java
index 2f83b6e38c..16ee7beded 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestProxyUriUtils.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestProxyUriUtils.java
@@ -23,44 +23,16 @@
 import java.net.URI;
 
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.junit.Test;
 
 public class TestProxyUriUtils {
-  public static class TestAppId extends ApplicationId {
-    private long timestamp;
-    private int id;
-
-    public TestAppId(int id, long timestamp) {
-      setId(id);
-      setClusterTimestamp(timestamp);
-    }
-    @Override
-    public int getId() {
-      return id;
-    }
-
-    @Override
-    public void setId(int id) {
-      this.id = id;
-    }
-
-    @Override
-    public long getClusterTimestamp() {
-      return timestamp;
-    }
-
-    @Override
-    public void setClusterTimestamp(long clusterTimestamp) {
-      this.timestamp = clusterTimestamp;
-    }
-  }
-  
   @Test
   public void testGetPathApplicationId() {
     assertEquals("/proxy/application_100_0001", 
-        ProxyUriUtils.getPath(new TestAppId(1, 100l)));
+        ProxyUriUtils.getPath(BuilderUtils.newApplicationId(100l, 1)));
     assertEquals("/proxy/application_6384623_0005", 
-        ProxyUriUtils.getPath(new TestAppId(5, 6384623l)));
+        ProxyUriUtils.getPath(BuilderUtils.newApplicationId(6384623l, 5)));
   }
 
   @Test(expected = IllegalArgumentException.class)
@@ -71,23 +43,23 @@ public void testGetPathApplicationIdBad() {
   @Test
   public void testGetPathApplicationIdString() {
     assertEquals("/proxy/application_6384623_0005", 
-        ProxyUriUtils.getPath(new TestAppId(5, 6384623l), null));
+        ProxyUriUtils.getPath(BuilderUtils.newApplicationId(6384623l, 5), null));
     assertEquals("/proxy/application_6384623_0005/static/app",
-        ProxyUriUtils.getPath(new TestAppId(5, 6384623l), "/static/app"));
+        ProxyUriUtils.getPath(BuilderUtils.newApplicationId(6384623l, 5), "/static/app"));
     assertEquals("/proxy/application_6384623_0005/", 
-        ProxyUriUtils.getPath(new TestAppId(5, 6384623l), "/"));
+        ProxyUriUtils.getPath(BuilderUtils.newApplicationId(6384623l, 5), "/"));
     assertEquals("/proxy/application_6384623_0005/some/path", 
-        ProxyUriUtils.getPath(new TestAppId(5, 6384623l), "some/path"));
+        ProxyUriUtils.getPath(BuilderUtils.newApplicationId(6384623l, 5), "some/path"));
   }
   
   @Test 
   public void testGetPathAndQuery() {
     assertEquals("/proxy/application_6384623_0005/static/app?foo=bar",
-    ProxyUriUtils.getPathAndQuery(new TestAppId(5, 6384623l), "/static/app", 
+    ProxyUriUtils.getPathAndQuery(BuilderUtils.newApplicationId(6384623l, 5), "/static/app", 
         "?foo=bar", false));
     
     assertEquals("/proxy/application_6384623_0005/static/app?foo=bar&bad=good&proxyapproved=true",
-        ProxyUriUtils.getPathAndQuery(new TestAppId(5, 6384623l), "/static/app", 
+        ProxyUriUtils.getPathAndQuery(BuilderUtils.newApplicationId(6384623l, 5), "/static/app", 
             "foo=bar&bad=good", true));
   }
 
@@ -95,10 +67,20 @@ public void testGetPathAndQuery() {
   public void testGetProxyUri() throws Exception {
     URI originalUri = new URI("http://host.com/static/foo?bar=bar");
     URI proxyUri = new URI("http://proxy.net:8080/");
-    TestAppId id = new TestAppId(5, 6384623l);
+    ApplicationId id = BuilderUtils.newApplicationId(6384623l, 5);
     URI expected = new URI("http://proxy.net:8080/proxy/application_6384623_0005/static/foo?bar=bar");
     URI result = ProxyUriUtils.getProxyUri(originalUri, proxyUri, id);
     assertEquals(expected, result);
   }
 
+  
+  @Test
+  public void testGetProxyUriNull() throws Exception {
+    URI originalUri = null;
+    URI proxyUri = new URI("http://proxy.net:8080/");
+    ApplicationId id = BuilderUtils.newApplicationId(6384623l, 5);
+    URI expected = new URI("http://proxy.net:8080/proxy/application_6384623_0005/");
+    URI result = ProxyUriUtils.getProxyUri(originalUri, proxyUri, id);
+    assertEquals(expected, result);
+  }
 }

From 9b7617a909cce724792e6c9e4522d71ebc89168d Mon Sep 17 00:00:00 2001
From: Thomas White <tomwhite@apache.org>
Date: Sat, 17 Dec 2011 00:09:04 +0000
Subject: [PATCH 7/9] HDFS-2640. Javadoc generation hangs.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1215354 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++
 hadoop-hdfs-project/hadoop-hdfs/pom.xml     | 7 +++++++
 2 files changed, 9 insertions(+)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 86d1b32efe..ac1c8ffce9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -265,6 +265,8 @@ Release 0.23.1 - UNRELEASED
     HDFS-2649. eclipse:eclipse build fails for hadoop-hdfs-httpfs.
     (Jason Lowe via eli)
 
+    HDFS-2640. Javadoc generation hangs. (tomwhite)
+
 Release 0.23.0 - 2011-11-01 
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
index 835b15a465..83c519cc4a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
@@ -320,6 +320,13 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <configuration>
+          <excludePackageNames>org.apache.hadoop.hdfs.protocol.proto</excludePackageNames>
+        </configuration>
+      </plugin>
       <plugin>
         <groupId>org.apache.rat</groupId>
         <artifactId>apache-rat-plugin</artifactId>

From 059290ce3a008f281e7e64fd263cfb09d92da19e Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Sat, 17 Dec 2011 00:14:36 +0000
Subject: [PATCH 8/9] HADOOP-7931. o.a.h.ipc.WritableRpcEngine should have a
 way to force initialization. Contributed by Aaron T. Myers

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1215358 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.txt                 |  3 ++
 .../apache/hadoop/ipc/WritableRpcEngine.java  | 35 +++++++++++++++----
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 21eda1b32f..aba8ce050c 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -141,6 +141,9 @@ Trunk (unreleased changes)
 
     HADOOP-7892. IPC logs too verbose after "RpcKind" introduction (todd)
 
+    HADOOP-7931. o.a.h.ipc.WritableRpcEngine should have a way to force
+                 initialization (atm)
+
   OPTIMIZATIONS
 
     HADOOP-7761. Improve the performance of raw comparisons. (todd)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java
index 25f46f13e2..19a496809b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java
@@ -48,17 +48,38 @@
 public class WritableRpcEngine implements RpcEngine {
   private static final Log LOG = LogFactory.getLog(RPC.class);
   
-  static { // Register the rpcRequest deserializer for WritableRpcEngine 
-    org.apache.hadoop.ipc.Server.registerProtocolEngine(RpcKind.RPC_WRITABLE,
-        Invocation.class, new Server.WritableRpcInvoker());
-  }
-
-  
   //writableRpcVersion should be updated if there is a change
   //in format of the rpc messages.
   
   // 2L - added declared class to Invocation
-  public static final long writableRpcVersion = 2L; 
+  public static final long writableRpcVersion = 2L;
+  
+  /**
+   * Whether or not this class has been initialized.
+   */
+  private static boolean isInitialized = false;
+  
+  static { 
+    ensureInitialized();
+  }
+  
+  /**
+   * Initialize this class if it isn't already.
+   */
+  public static synchronized void ensureInitialized() {
+    if (!isInitialized) {
+      initialize();
+    }
+  }
+  
+  /**
+   * Register the rpcRequest deserializer for WritableRpcEngine
+   */
+  private static synchronized void initialize() {
+    org.apache.hadoop.ipc.Server.registerProtocolEngine(RpcKind.RPC_WRITABLE,
+        Invocation.class, new Server.WritableRpcInvoker());
+    isInitialized = true;
+  }
 
   
   /** A method invocation, including the method name and its parameters.*/

From 52c149115512ff95cdf991a7308e18d5e127956b Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Sat, 17 Dec 2011 00:19:01 +0000
Subject: [PATCH 9/9] HDFS-2694. Removal of Avro broke non-PB NN services.
 Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1215364 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt                    | 2 ++
 .../apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java  | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index ac1c8ffce9..406a3a2d8b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -170,6 +170,8 @@ Trunk (unreleased changes)
     HDFS-1765. Block Replication should respect under-replication
     block priority. (Uma Maheswara Rao G via eli)
 
+    HDFS-2694. Removal of Avro broke non-PB NN services. (atm)
+
 Release 0.23.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index de2a7f3472..d594766802 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -94,6 +94,7 @@
 import org.apache.hadoop.ipc.ProtocolSignature;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.Server;
+import org.apache.hadoop.ipc.WritableRpcEngine;
 import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind;
 import org.apache.hadoop.net.Node;
 import org.apache.hadoop.security.AccessControlException;
@@ -161,6 +162,8 @@ public NameNodeRpcServer(Configuration conf, NameNode nn)
 	  BlockingService NNPbService = NamenodeProtocolService
           .newReflectiveBlockingService(namenodeProtocolXlator);
     
+    WritableRpcEngine.ensureInitialized();
+    
     InetSocketAddress dnSocketAddr = nn.getServiceRpcServerAddress(conf);
     if (dnSocketAddr != null) {
       int serviceHandlerCount =

Parameter	Description	Notes
`-trace`	Anonymizes job traces.	Anonymizes sensitive fields like user-name, job-name, queue-name + host-names, job configuration parameters etc.
`-topology`	Anonymizes cluster topology	Anonymizes rack-names and host-names.
Parameter	Description
+ `rumen.data-types.classname.preserve` +	A comma separated list of prefixes that the Anonymizer + will not anonymize while processing classnames. If + `rumen.data-types.classname.preserve` is set to + `'org.apache,com.hadoop.'` then + classnames starting with `'org.apache'` or + `'com.hadoop.'` will not be anonymized. +
+ `rumen.datatypes.jobproperties.parsers` +	A comma separated list of job properties parsers. These parsers + decide how the job configuration parameters + (i.e <key,value> pairs) should be processed. Default is + `MapReduceJobPropertiesParser`. The default parser will + only parse framework-level MapReduce specific job configuration + properties. Users can add custom parsers by implementing the + `JobPropertiesParser` interface. Rumen also provides an + all-pass (i.e no filter) parser called + `DefaultJobPropertiesParser`. +
+ `rumen.anonymization.states.dir` +	Set this to a location (on LocalFileSystem or HDFS) for enabling + state persistence and/or reload. This parameter is not set by + default. Reloading and persistence of states depend on the state + directory. Note that the state directory will contain the latest + as well as previous states. +
+ `rumen.anonymization.states.persist` +	Set this to `'true'` to persist the current state. + Default value is `'false'`. Note that the states will + be persisted to the state manager's state directory + specified using the `rumen.anonymization.states.dir` + parameter. +
+ `rumen.anonymization.states.reload` +	Set this to `'true'` to enable reuse of previously + persisted state. The default value is `'false'`. The + previously persisted state will be reloaded from the state + manager's state directory specified using the + `rumen.anonymization.states.dir` parameter. Note that + the Anonymizer will bail out if it fails to find any + previously persisted state in the state directory or if the state + directory is not set. If the user wishes to retain/reuse the + states across multiple invocations of the Anonymizer, + then the very first invocation of the Anonymizer should + have `rumen.anonymization.states.reload` set to + `'false'` and + `rumen.anonymization.states.persist` set to + `'true'`. Subsequent invocations of the + Anonymizer can then have + `rumen.anonymization.states.reload` set to + `'true'`. +