diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 0cad461627..d5d9196f2b 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -24,6 +24,9 @@ Trunk (unreleased changes)
BUG FIXES
+ MAPREDUCE-3157. [Rumen] Fix TraceBuilder to handle 0.20 history file
+ names also. (Ravi Gummadi)
+
MAPREDUCE-2950. [Gridmix] TestUserResolve fails in trunk.
(Ravi Gummadi via amarrk)
diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
index 32bce5c3ce..4246b4e2b1 100644
--- a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
+++ b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
@@ -246,8 +246,57 @@ public void testHadoop20JHParser() throws Exception {
}
/**
- * Tests if {@link TraceBuilder} can correctly identify and parse jobhistory
- * filenames. The testcase checks if {@link TraceBuilder}
+ * Validate the parsing of given history file name. Also validate the history
+ * file name suffixed with old/stale file suffix.
+ * @param jhFileName job history file path
+ * @param jid JobID
+ */
+ private void validateHistoryFileNameParsing(Path jhFileName,
+ org.apache.hadoop.mapred.JobID jid) {
+ JobID extractedJID =
+ JobID.forName(JobHistoryUtils.extractJobID(jhFileName.getName()));
+ assertEquals("TraceBuilder failed to parse the current JH filename"
+ + jhFileName, jid, extractedJID);
+ // test jobhistory filename with old/stale file suffix
+ jhFileName = jhFileName.suffix(JobHistory.getOldFileSuffix("123"));
+ extractedJID =
+ JobID.forName(JobHistoryUtils.extractJobID(jhFileName.getName()));
+ assertEquals("TraceBuilder failed to parse the current JH filename"
+ + "(old-suffix):" + jhFileName,
+ jid, extractedJID);
+ }
+
+ /**
+ * Validate the parsing of given history conf file name. Also validate the
+ * history conf file name suffixed with old/stale file suffix.
+ * @param jhConfFileName job history conf file path
+ * @param jid JobID
+ */
+ private void validateJHConfFileNameParsing(Path jhConfFileName,
+ org.apache.hadoop.mapred.JobID jid) {
+ assertTrue("TraceBuilder failed to parse the JH conf filename:"
+ + jhConfFileName,
+ JobHistoryUtils.isJobConfXml(jhConfFileName.getName()));
+ JobID extractedJID =
+ JobID.forName(JobHistoryUtils.extractJobID(jhConfFileName.getName()));
+ assertEquals("TraceBuilder failed to parse the current JH conf filename:"
+ + jhConfFileName, jid, extractedJID);
+ // Test jobhistory conf filename with old/stale file suffix
+ jhConfFileName = jhConfFileName.suffix(JobHistory.getOldFileSuffix("123"));
+ assertTrue("TraceBuilder failed to parse the current JH conf filename"
+ + " (old suffix):" + jhConfFileName,
+ JobHistoryUtils.isJobConfXml(jhConfFileName.getName()));
+ extractedJID =
+ JobID.forName(JobHistoryUtils.extractJobID(jhConfFileName.getName()));
+ assertEquals("TraceBuilder failed to parse the JH conf filename"
+ + "(old-suffix):" + jhConfFileName,
+ jid, extractedJID);
+ }
+
+ /**
+ * Tests if {@link TraceBuilder} can correctly identify and parse different
+ * versions of jobhistory filenames. The testcase checks if
+ * {@link TraceBuilder}
* - correctly identifies a jobhistory filename without suffix
* - correctly parses a jobhistory filename without suffix to extract out
* the jobid
@@ -261,36 +310,36 @@ public void testHadoop20JHParser() throws Exception {
public void testJobHistoryFilenameParsing() throws IOException {
final Configuration conf = new Configuration();
final FileSystem lfs = FileSystem.getLocal(conf);
- String user = "test";
+ String user = "testUser";
org.apache.hadoop.mapred.JobID jid =
new org.apache.hadoop.mapred.JobID("12345", 1);
final Path rootInputDir =
new Path(System.getProperty("test.tools.input.dir", ""))
.makeQualified(lfs.getUri(), lfs.getWorkingDirectory());
- // Check if jobhistory filename are detected properly
+ // Check if current jobhistory filenames are detected properly
Path jhFilename = JobHistory.getJobHistoryFile(rootInputDir, jid, user);
- JobID extractedJID =
- JobID.forName(TraceBuilder.extractJobID(jhFilename.getName()));
- assertEquals("TraceBuilder failed to parse the current JH filename",
- jid, extractedJID);
- // test jobhistory filename with old/stale file suffix
- jhFilename = jhFilename.suffix(JobHistory.getOldFileSuffix("123"));
- extractedJID =
- JobID.forName(TraceBuilder.extractJobID(jhFilename.getName()));
- assertEquals("TraceBuilder failed to parse the current JH filename"
- + "(old-suffix)",
- jid, extractedJID);
-
- // Check if the conf filename in jobhistory are detected properly
+ validateHistoryFileNameParsing(jhFilename, jid);
+
+ // Check if Pre21 V1 jophistory file names are detected properly
+ jhFilename = new Path("jt-identifier_" + jid + "_user-name_job-name");
+ validateHistoryFileNameParsing(jhFilename, jid);
+
+ // Check if Pre21 V2 jobhistory file names are detected properly
+ jhFilename = new Path(jid + "_user-name_job-name");
+ validateHistoryFileNameParsing(jhFilename, jid);
+
+ // Check if the current jobhistory conf filenames are detected properly
Path jhConfFilename = JobHistory.getConfFile(rootInputDir, jid);
- assertTrue("TraceBuilder failed to parse the current JH conf filename",
- TraceBuilder.isJobConfXml(jhConfFilename.getName(), null));
- // test jobhistory conf filename with old/stale file suffix
- jhConfFilename = jhConfFilename.suffix(JobHistory.getOldFileSuffix("123"));
- assertTrue("TraceBuilder failed to parse the current JH conf filename"
- + " (old suffix)",
- TraceBuilder.isJobConfXml(jhConfFilename.getName(), null));
+ validateJHConfFileNameParsing(jhConfFilename, jid);
+
+ // Check if Pre21 V1 jobhistory conf file names are detected properly
+ jhConfFilename = new Path("jt-identifier_" + jid + "_conf.xml");
+ validateJHConfFileNameParsing(jhConfFilename, jid);
+
+ // Check if Pre21 V2 jobhistory conf file names are detected properly
+ jhConfFilename = new Path(jid + "_conf.xml");
+ validateJHConfFileNameParsing(jhConfFilename, jid);
}
/**
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java
new file mode 100644
index 0000000000..a6e8919ed9
--- /dev/null
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistory;
+
+/**
+ * Job History related utils for handling multiple formats of history logs of
+ * different hadoop versions like Pre21 history logs, current history logs.
+ */
+public class JobHistoryUtils {
+
+ private static String applyParser(String fileName, Pattern pattern) {
+ Matcher matcher = pattern.matcher(fileName);
+
+ if (!matcher.matches()) {
+ return null;
+ }
+
+ return matcher.group(1);
+ }
+
+ /**
+ * Extracts jobID string from the given job history log file name or
+ * job history configuration file name.
+ * @param fileName name of job history file or job history configuration file
+ * @return a valid jobID String, parsed out of the file name. Otherwise,
+ * [especially for .crc files] returns null.
+ */
+ static String extractJobID(String fileName) {
+ // Get jobID if fileName is a config file name.
+ String jobId = extractJobIDFromConfFileName(fileName);
+ if (jobId == null) {
+ // Get JobID if fileName is a job history file name
+ jobId = extractJobIDFromHistoryFileName(fileName);
+ }
+ return jobId;
+ }
+
+ /**
+ * Extracts jobID string from the given job history file name.
+ * @param fileName name of the job history file
+ * @return JobID if the given fileName
is a valid job history
+ * file name, null
otherwise.
+ */
+ private static String extractJobIDFromHistoryFileName(String fileName) {
+ // History file name could be in one of the following formats
+ // (1) old pre21 job history file name format
+ // (2) new pre21 job history file name format
+ // (3) current job history file name format i.e. 0.22
+ String pre21JobID = applyParser(fileName,
+ Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V1);
+ if (pre21JobID == null) {
+ pre21JobID = applyParser(fileName,
+ Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V2);
+ }
+ if (pre21JobID != null) {
+ return pre21JobID;
+ }
+ return applyParser(fileName, JobHistory.JOBHISTORY_FILENAME_REGEX);
+ }
+
+ /**
+ * Extracts jobID string from the given job conf xml file name.
+ * @param fileName name of the job conf xml file
+ * @return job id if the given fileName
is a valid job conf xml
+ * file name, null
otherwise.
+ */
+ private static String extractJobIDFromConfFileName(String fileName) {
+ // History conf file name could be in one of the following formats
+ // (1) old pre21 job history file name format
+ // (2) new pre21 job history file name format
+ // (3) current job history file name format i.e. 0.22
+ String pre21JobID = applyParser(fileName,
+ Pre21JobHistoryConstants.CONF_FILENAME_REGEX_V1);
+ if (pre21JobID == null) {
+ pre21JobID = applyParser(fileName,
+ Pre21JobHistoryConstants.CONF_FILENAME_REGEX_V2);
+ }
+ if (pre21JobID != null) {
+ return pre21JobID;
+ }
+ return applyParser(fileName, JobHistory.CONF_FILENAME_REGEX);
+ }
+
+ /**
+ * Checks if the given fileName
is a valid job conf xml file name
+ * @param fileName name of the file to be validated
+ * @return true
if the given fileName
is a valid
+ * job conf xml file name.
+ */
+ static boolean isJobConfXml(String fileName) {
+ String jobId = extractJobIDFromConfFileName(fileName);
+ return jobId != null;
+ }
+}
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java
index 6a972219f8..184db8ff04 100644
--- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java
@@ -20,10 +20,10 @@
import java.util.regex.Pattern;
import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistory;
/**
- *
- *
+ * Job History related constants for Hadoop releases prior to 0.21
*/
public class Pre21JobHistoryConstants {
@@ -51,18 +51,34 @@ public static enum Values {
}
/**
- * Pre21 regex for jobhistory filename
+ * Regex for Pre21 V1(old) jobhistory filename
* i.e jt-identifier_job-id_user-name_job-name
*/
- static final Pattern JOBHISTORY_FILENAME_REGEX =
+ static final Pattern JOBHISTORY_FILENAME_REGEX_V1 =
Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX + ")_.+");
+ /**
+ * Regex for Pre21 V2(new) jobhistory filename
+ * i.e job-id_user-name_job-name
+ */
+ static final Pattern JOBHISTORY_FILENAME_REGEX_V2 =
+ Pattern.compile("(" + JobID.JOBID_REGEX + ")_.+");
+
+ static final String OLD_FULL_SUFFIX_REGEX_STRING =
+ "(?:\\.[0-9]+" + Pattern.quote(JobHistory.OLD_SUFFIX) + ")";
/**
- * Pre21 regex for jobhistory conf filename
+ * Regex for Pre21 V1(old) jobhistory conf filename
* i.e jt-identifier_job-id_conf.xml
*/
- static final Pattern CONF_FILENAME_REGEX =
- Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX
- + ")_conf.xml(?:\\.[0-9a-zA-Z]+)?");
+ static final Pattern CONF_FILENAME_REGEX_V1 =
+ Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX + ")_conf.xml"
+ + OLD_FULL_SUFFIX_REGEX_STRING + "?");
+ /**
+ * Regex for Pre21 V2(new) jobhistory conf filename
+ * i.e job-id_conf.xml
+ */
+ static final Pattern CONF_FILENAME_REGEX_V2 =
+ Pattern.compile("(" + JobID.JOBID_REGEX + ")_conf.xml"
+ + OLD_FULL_SUFFIX_REGEX_STRING + "?");
}
diff --git a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java
index 7330c712a8..c03030971c 100644
--- a/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java
+++ b/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java
@@ -198,42 +198,6 @@ public static void main(String[] args) {
}
}
- private static String applyParser(String fileName, Pattern pattern) {
- Matcher matcher = pattern.matcher(fileName);
-
- if (!matcher.matches()) {
- return null;
- }
-
- return matcher.group(1);
- }
-
- /**
- * @param fileName
- * @return the jobID String, parsed out of the file name. We return a valid
- * String for either a history log file or a config file. Otherwise,
- * [especially for .crc files] we return null.
- */
- static String extractJobID(String fileName) {
- String jobId = applyParser(fileName, JobHistory.JOBHISTORY_FILENAME_REGEX);
- if (jobId == null) {
- // check if its a pre21 jobhistory file
- jobId = applyParser(fileName,
- Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX);
- }
- return jobId;
- }
-
- static boolean isJobConfXml(String fileName, InputStream input) {
- String jobId = applyParser(fileName, JobHistory.CONF_FILENAME_REGEX);
- if (jobId == null) {
- // check if its a pre21 jobhistory conf file
- jobId = applyParser(fileName,
- Pre21JobHistoryConstants.CONF_FILENAME_REGEX);
- }
- return jobId != null;
- }
-
@SuppressWarnings("unchecked")
@Override
@@ -268,7 +232,7 @@ public int run(String[] args) throws Exception {
JobHistoryParser parser = null;
try {
- String jobID = extractJobID(filePair.first());
+ String jobID = JobHistoryUtils.extractJobID(filePair.first());
if (jobID == null) {
LOG.warn("File skipped: Invalid file name: "
+ filePair.first());
@@ -282,8 +246,9 @@ public int run(String[] args) throws Exception {
jobBuilder = new JobBuilder(jobID);
}
- if (isJobConfXml(filePair.first(), ris)) {
- processJobConf(JobConfigurationParser.parse(ris.rewind()), jobBuilder);
+ if (JobHistoryUtils.isJobConfXml(filePair.first())) {
+ processJobConf(JobConfigurationParser.parse(ris.rewind()),
+ jobBuilder);
} else {
parser = JobHistoryParserFactory.getParser(ris);
if (parser == null) {