MAPREDUCE-3166. [Rumen] Make Rumen use job history api instead of relying on current history file name format.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1182330 13f79535-47bb-0310-9956-ffa450edef68
2011-10-12 12:19:02 +00:00 · 2011-10-12 12:19:02 +00:00 · f143895bc2
commit f143895bc2
parent efb3cd64a2
4 changed files with 43 additions and 6 deletions
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@ -24,6 +24,9 @@ Trunk (unreleased changes)
  BUG FIXES
    MAPREDUCE-3166. [Rumen] Make Rumen use job history api instead of relying
    on current history file name format. (Ravi Gummadi)
    MAPREDUCE-3157. [Rumen] Fix TraceBuilder to handle 0.20 history file
    names also. (Ravi Gummadi)
--- a/hadoop-mapreduce-project/ivy.xml
+++ b/hadoop-mapreduce-project/ivy.xml
@ -87,6 +87,8 @@
               rev="${yarn.version}" conf="compile->default"/>
   <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core" 
               rev="${yarn.version}" conf="compile->default"/>
   <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-common" 
               rev="${yarn.version}" conf="compile->default"/>
   <dependency org="org.apache.hadoop" name="hadoop-yarn-common"
               rev="${yarn.version}" conf="compile->default"/>
   <dependency org="log4j" name="log4j" rev="${log4j.version}" 
--- a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
+++ b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
@ -310,7 +310,7 @@ private void validateJHConfFileNameParsing(Path jhConfFileName,
  public void testJobHistoryFilenameParsing() throws IOException {
    final Configuration conf = new Configuration();
    final FileSystem lfs = FileSystem.getLocal(conf);
-    String user = "testUser";
+
    org.apache.hadoop.mapred.JobID jid = 
      new org.apache.hadoop.mapred.JobID("12345", 1);
    final Path rootInputDir =
@ -318,7 +318,8 @@ public void testJobHistoryFilenameParsing() throws IOException {
            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());
    // Check if current jobhistory filenames are detected properly
-    Path jhFilename = JobHistory.getJobHistoryFile(rootInputDir, jid, user);
+    Path jhFilename = org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils
        .getStagingJobHistoryFile(rootInputDir, jid.toString(), 1);
    validateHistoryFileNameParsing(jhFilename, jid);
    // Check if Pre21 V1 jophistory file names are detected properly
--- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java
@ -17,6 +17,7 @@
 */
 package org.apache.hadoop.tools.rumen;
 import java.io.IOException;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@ -56,6 +57,30 @@ static String extractJobID(String fileName) {
    return jobId;
  }
  /**
   * Extracts job id from the current hadoop version's job history file name.
   * @param fileName job history file name from which job id is to be extracted
   * @return job id if the history file name format is same as that of the
   * current hadoop version. Returns null otherwise.
   */
  private static String extractJobIDFromCurrentHistoryFile(String fileName) {
    JobID id = null;
    if (org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils
            .isValidJobHistoryFileName(fileName)) {
      try {
        id = org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils
                 .getJobIDFromHistoryFilePath(fileName);
      } catch (IOException e) {
        // Ignore this exception and go ahead with getting of jobID assuming
        // older hadoop verison's history file
      }
    }
    if (id != null) {
      return id.toString();
    }
    return null;
  }
  /**
   * Extracts jobID string from the given job history file name.
   * @param fileName name of the job history file
@ -67,17 +92,23 @@ private static String extractJobIDFromHistoryFileName(String fileName) {
    // (1) old pre21 job history file name format
    // (2) new pre21 job history file name format
    // (3) current job history file name format i.e. 0.22
    // Try to get the jobID assuming that the history file is from the current
    // hadoop version
    String jobID = extractJobIDFromCurrentHistoryFile(fileName);
    if (jobID != null) {
      return jobID;//history file is of current hadoop version
    }
    // History file could be of older hadoop versions
    String pre21JobID = applyParser(fileName,
        Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V1);
    if (pre21JobID == null) {
      pre21JobID = applyParser(fileName,
          Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V2);
    }
    if (pre21JobID != null) {
    return pre21JobID;
  }
    return applyParser(fileName, JobHistory.JOBHISTORY_FILENAME_REGEX);
  }
  /**
   * Extracts jobID string from the given job conf xml file name.