From 047b175aac0c8431d3d1f31665b9f8c4569e6c64 Mon Sep 17 00:00:00 2001 From: Arun Murthy Date: Fri, 7 Oct 2011 20:44:37 +0000 Subject: [PATCH] MAPREDUCE-2802. Ensure JobHistory filenames have jobId. Contributed by Jonathan Eagles. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1180220 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../v2/jobhistory/FileNameIndexUtils.java | 53 +++---- .../v2/jobhistory/TestFileNameIndexUtils.java | 130 ++++++++++++++++++ 3 files changed, 150 insertions(+), 36 deletions(-) create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/jobhistory/TestFileNameIndexUtils.java diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index dfe921af15..d276331ab8 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -1549,6 +1549,9 @@ Release 0.23.0 - Unreleased MAPREDUCE-3033. Ensure Master interface pays attention to classic v/s yarn frameworks. (Hitesh Shah via acmurthy) + MAPREDUCE-2802. Ensure JobHistory filenames have jobId. (Jonathan Eagles + via acmurthy) + Release 0.22.0 - Unreleased INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/FileNameIndexUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/FileNameIndexUtils.java index 5eaf0e3ee7..f22d51c7c6 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/FileNameIndexUtils.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/FileNameIndexUtils.java @@ -29,11 +29,11 @@ public class FileNameIndexUtils { - static final String UNDERSCORE_ESCAPE = "%5F"; static final int JOB_NAME_TRIM_LENGTH = 50; - //This has to be underscore currently. Untill escape uses DELIMITER. - static final String DELIMITER = "_"; + // Sanitize job history file for predictable parsing + static final String DELIMITER = "-"; + static final String DELIMITER_ESCAPE = "%2D"; private static final int JOB_ID_INDEX = 0; private static final int SUBMIT_TIME_INDEX = 1; @@ -54,7 +54,7 @@ public class FileNameIndexUtils { public static String getDoneFileName(JobIndexInfo indexInfo) throws IOException { StringBuilder sb = new StringBuilder(); //JobId - sb.append(escapeUnderscores(TypeConverter.fromYarn(indexInfo.getJobId()).toString())); + sb.append(escapeDelimiters(TypeConverter.fromYarn(indexInfo.getJobId()).toString())); sb.append(DELIMITER); //StartTime @@ -62,11 +62,11 @@ public static String getDoneFileName(JobIndexInfo indexInfo) throws IOException sb.append(DELIMITER); //UserName - sb.append(escapeUnderscores(getUserName(indexInfo))); + sb.append(escapeDelimiters(getUserName(indexInfo))); sb.append(DELIMITER); //JobName - sb.append(escapeUnderscores(trimJobName(getJobName(indexInfo)))); + sb.append(escapeDelimiters(trimJobName(getJobName(indexInfo)))); sb.append(DELIMITER); //FinishTime @@ -136,13 +136,13 @@ public static JobIndexInfo getIndexInfo(String jhFileName) throws IOException { */ public static String encodeJobHistoryFileName(String logFileName) throws IOException { - String replacementUnderscoreEscape = null; + String replacementDelimiterEscape = null; - if (logFileName.contains(UNDERSCORE_ESCAPE)) { - replacementUnderscoreEscape = nonOccursString(logFileName); + // Temporarily protect the escape delimiters from encoding + if (logFileName.contains(DELIMITER_ESCAPE)) { + replacementDelimiterEscape = nonOccursString(logFileName); - logFileName = replaceStringInstances - (logFileName, UNDERSCORE_ESCAPE, replacementUnderscoreEscape); + logFileName = logFileName.replaceAll(DELIMITER_ESCAPE, replacementDelimiterEscape); } String encodedFileName = null; @@ -154,10 +154,10 @@ public static String encodeJobHistoryFileName(String logFileName) ioe.setStackTrace(uee.getStackTrace()); throw ioe; } - - if (replacementUnderscoreEscape != null) { - encodedFileName = replaceStringInstances - (encodedFileName, replacementUnderscoreEscape, UNDERSCORE_ESCAPE); + + // Restore protected escape delimiters after encoding + if (replacementDelimiterEscape != null) { + encodedFileName = encodedFileName.replaceAll(replacementDelimiterEscape, DELIMITER_ESCAPE); } return encodedFileName; @@ -214,29 +214,10 @@ private static String getNonEmptyString(String in) { return in; } - private static String escapeUnderscores(String escapee) { - return replaceStringInstances(escapee, "_", UNDERSCORE_ESCAPE); + private static String escapeDelimiters(String escapee) { + return escapee.replaceAll(DELIMITER, DELIMITER_ESCAPE); } - - // I tolerate this code because I expect a low number of - // occurrences in a relatively short string - private static String replaceStringInstances - (String logFileName, String old, String replacement) { - int index = logFileName.indexOf(old); - while (index > 0) { - logFileName = (logFileName.substring(0, index) - + replacement - + replaceStringInstances - (logFileName.substring(index + old.length()), - old, replacement)); - - index = logFileName.indexOf(old); - } - - return logFileName; - } - /** * Trims the job-name if required */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/jobhistory/TestFileNameIndexUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/jobhistory/TestFileNameIndexUtils.java new file mode 100644 index 0000000000..9de3dcdfaa --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/jobhistory/TestFileNameIndexUtils.java @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.v2.jobhistory; + +import java.io.IOException; + +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.TypeConverter; +import org.apache.hadoop.mapreduce.v2.api.records.JobId; + +import org.junit.Assert; +import org.junit.Test; + +public class TestFileNameIndexUtils { + + private static final String JOB_HISTORY_FILE_FORMATTER = "%s" + + FileNameIndexUtils.DELIMITER + "%s" + + FileNameIndexUtils.DELIMITER + "%s" + + FileNameIndexUtils.DELIMITER + "%s" + + FileNameIndexUtils.DELIMITER + "%s" + + FileNameIndexUtils.DELIMITER + "%s" + + FileNameIndexUtils.DELIMITER + "%s" + + FileNameIndexUtils.DELIMITER + "%s" + + JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION; + + private static final String JOB_ID = "job_1317928501754_0001"; + private static final String SUBMIT_TIME = "1317928742025"; + private static final String USER_NAME = "username"; + private static final String USER_NAME_WITH_DELIMITER = "user" + + FileNameIndexUtils.DELIMITER + "name"; + private static final String USER_NAME_WITH_DELIMITER_ESCAPE = "user" + + FileNameIndexUtils.DELIMITER_ESCAPE + "name"; + private static final String JOB_NAME = "mapreduce"; + private static final String JOB_NAME_WITH_DELIMITER = "map" + + FileNameIndexUtils.DELIMITER + "reduce"; + private static final String JOB_NAME_WITH_DELIMITER_ESCAPE = "map" + + FileNameIndexUtils.DELIMITER_ESCAPE + "reduce"; + private static final String FINISH_TIME = "1317928754958"; + private static final String NUM_MAPS = "1"; + private static final String NUM_REDUCES = "1"; + private static final String JOB_STATUS = "SUCCEEDED"; + + @Test + public void testUserNamePercentEncoding() throws IOException{ + JobIndexInfo info = new JobIndexInfo(); + JobID oldJobId = JobID.forName(JOB_ID); + JobId jobId = TypeConverter.toYarn(oldJobId); + info.setJobId(jobId); + info.setSubmitTime(Long.parseLong(SUBMIT_TIME)); + info.setUser(USER_NAME_WITH_DELIMITER); + info.setJobName(JOB_NAME); + info.setFinishTime(Long.parseLong(FINISH_TIME)); + info.setNumMaps(Integer.parseInt(NUM_MAPS)); + info.setNumReduces(Integer.parseInt(NUM_REDUCES)); + info.setJobStatus(JOB_STATUS); + + String jobHistoryFile = FileNameIndexUtils.getDoneFileName(info); + Assert.assertTrue("User name not encoded correctly into job history file", + jobHistoryFile.contains(USER_NAME_WITH_DELIMITER_ESCAPE)); + } + + @Test + public void testUserNamePercentDecoding() throws IOException { + String jobHistoryFile = String.format(JOB_HISTORY_FILE_FORMATTER, + JOB_ID, + SUBMIT_TIME, + USER_NAME_WITH_DELIMITER_ESCAPE, + JOB_NAME, + FINISH_TIME, + NUM_MAPS, + NUM_REDUCES, + JOB_STATUS); + + JobIndexInfo info = FileNameIndexUtils.getIndexInfo(jobHistoryFile); + Assert.assertEquals("User name doesn't match", + USER_NAME_WITH_DELIMITER, info.getUser()); + } + + @Test + public void testJobNamePercentEncoding() throws IOException { + JobIndexInfo info = new JobIndexInfo(); + JobID oldJobId = JobID.forName(JOB_ID); + JobId jobId = TypeConverter.toYarn(oldJobId); + info.setJobId(jobId); + info.setSubmitTime(Long.parseLong(SUBMIT_TIME)); + info.setUser(USER_NAME); + info.setJobName(JOB_NAME_WITH_DELIMITER); + info.setFinishTime(Long.parseLong(FINISH_TIME)); + info.setNumMaps(Integer.parseInt(NUM_MAPS)); + info.setNumReduces(Integer.parseInt(NUM_REDUCES)); + info.setJobStatus(JOB_STATUS); + + String jobHistoryFile = FileNameIndexUtils.getDoneFileName(info); + Assert.assertTrue("Job name not encoded correctly into job history file", + jobHistoryFile.contains(JOB_NAME_WITH_DELIMITER_ESCAPE)); + } + + @Test + public void testJobNamePercentDecoding() throws IOException { + String jobHistoryFile = String.format(JOB_HISTORY_FILE_FORMATTER, + JOB_ID, + SUBMIT_TIME, + USER_NAME, + JOB_NAME_WITH_DELIMITER_ESCAPE, + FINISH_TIME, + NUM_MAPS, + NUM_REDUCES, + JOB_STATUS); + + JobIndexInfo info = FileNameIndexUtils.getIndexInfo(jobHistoryFile); + Assert.assertEquals("Job name doesn't match", + JOB_NAME_WITH_DELIMITER, info.getJobName()); + } +}