diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 0c64a5b6ad..93b5d63e76 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -131,6 +131,9 @@ Trunk (Unreleased)
MAPREDUCE-5078. TestMRAppMaster fails on Windows due to mismatched path
separators. (Chris Nauroth via sseth)
+ MAPREDUCE-4885. Streaming tests have multiple failures on Windows. (Chris
+ Nauroth via bikas)
+
BREAKDOWN OF HADOOP-8562 SUBTASKS
MAPREDUCE-4739. Some MapReduce tests fail to find winutils.
diff --git a/hadoop-tools/hadoop-streaming/pom.xml b/hadoop-tools/hadoop-streaming/pom.xml
index 9924969316..7265c0468d 100644
--- a/hadoop-tools/hadoop-streaming/pom.xml
+++ b/hadoop-tools/hadoop-streaming/pom.xml
@@ -127,6 +127,20 @@
+
+ copy-test-bin
+ process-test-resources
+
+ run
+
+
+
+
+
+
+
+
+
diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java
index 2acc5563bd..d18a7654e0 100644
--- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java
+++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java
@@ -294,8 +294,7 @@ void parseArgv() {
for (String file : values) {
packageFiles_.add(file);
try {
- URI pathURI = new URI(file);
- Path path = new Path(pathURI);
+ Path path = new Path(file);
FileSystem localFs = FileSystem.getLocal(config_);
String finalPath = path.makeQualified(localFs).toString();
if(fileList.length() > 0) {
diff --git a/hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd b/hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd
new file mode 100644
index 0000000000..4b38e3e3b4
--- /dev/null
+++ b/hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd
@@ -0,0 +1,18 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements. See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership. The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License. You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+
+@for /F "usebackq tokens=* delims=" %%A in (`findstr .`) do @echo %%A
+@rem lines have been copied from stdin to stdout
diff --git a/hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd b/hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd
new file mode 100644
index 0000000000..f398a8d65c
--- /dev/null
+++ b/hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd
@@ -0,0 +1,18 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements. See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership. The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License. You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+
+@for /F "usebackq tokens=* delims=" %%A in (`findstr .`) do @type %%A
+@rem files named on stdin have been copied to stdout
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java
index ac577e4c7e..e864e9d855 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.streaming;
import java.io.DataOutputStream;
+import java.io.File;
import java.io.IOException;
import java.util.Map;
@@ -27,6 +28,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.util.Shell;
import org.junit.After;
import org.junit.Before;
@@ -45,7 +47,8 @@ public class TestFileArgs extends TestStreaming
private static final String EXPECTED_OUTPUT =
"job.jar\t\nsidefile\t\n";
- private static final String LS_PATH = "/bin/ls";
+ private static final String LS_PATH = Shell.WINDOWS ? "cmd /c dir /B" :
+ "/bin/ls";
public TestFileArgs() throws IOException
{
@@ -58,6 +61,7 @@ public TestFileArgs() throws IOException
map = LS_PATH;
FileSystem.setDefaultUri(conf, "hdfs://" + namenode);
+ setTestDir(new File("/tmp/TestFileArgs"));
}
@Before
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java
index c5136e6dc7..47b70ef6fb 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java
@@ -70,8 +70,8 @@ public TestMultipleArchiveFiles() throws Exception {
namenode = fileSys.getUri().getAuthority();
mr = new MiniMRCluster(1, namenode, 1);
- map = "xargs cat";
- reduce = "cat";
+ map = XARGS_CAT;
+ reduce = CAT;
}
@Override
@@ -84,7 +84,8 @@ protected void createInput() throws IOException
{
fileSys.delete(new Path(INPUT_DIR), true);
DataOutputStream dos = fileSys.create(new Path(INPUT_FILE));
- String inputFileString = "symlink1/cacheArchive1\nsymlink2/cacheArchive2";
+ String inputFileString = "symlink1" + File.separator
+ + "cacheArchive1\nsymlink2" + File.separator + "cacheArchive2";
dos.write(inputFileString.getBytes("UTF-8"));
dos.close();
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java
index 357bfcfd0b..ae8f57d231 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java
@@ -49,8 +49,8 @@ public class TestMultipleCachefiles
String CACHE_FILE = "/testing-streaming/cache.txt";
String CACHE_FILE_2 = "/testing-streaming/cache2.txt";
String input = "check to see if we can read this none reduce";
- String map = "xargs cat ";
- String reduce = "cat";
+ String map = TestStreaming.XARGS_CAT;
+ String reduce = TestStreaming.CAT;
String mapString = "testlink";
String mapString2 = "testlink2";
String cacheString = "This is just the cache string";
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java
index da0bdae484..53009dbbab 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java
@@ -33,7 +33,7 @@ public TestStreamXmlRecordReader() throws IOException {
INPUT_FILE = new File("target/input.xml");
input = "\t\nroses.are.red\t\nviolets.are.blue\t\n" +
"bunnies.are.pink\t\n\t\n";
- map = "cat";
+ map = CAT;
reduce = "NONE";
outputExpect = input;
}
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java
index 98ed1a299e..4f39120a16 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java
@@ -33,7 +33,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.Configuration;
-
+import org.apache.hadoop.util.Shell;
/**
* This class tests hadoopStreaming in MapReduce local mode.
@@ -43,6 +43,22 @@ public class TestStreaming
public static final String STREAMING_JAR = JarFinder.getJar(StreamJob.class);
+ /**
+ * cat command used for copying stdin to stdout as mapper or reducer function.
+ * On Windows, use a cmd script that approximates the functionality of cat.
+ */
+ static final String CAT = Shell.WINDOWS ?
+ "cmd /c " + new File("target/bin/cat.cmd").getAbsolutePath() : "cat";
+
+ /**
+ * Command used for iterating through file names on stdin and copying each
+ * file's contents to stdout, used as mapper or reducer function. On Windows,
+ * use a cmd script that approximates the functionality of xargs cat.
+ */
+ static final String XARGS_CAT = Shell.WINDOWS ?
+ "cmd /c " + new File("target/bin/xargs_cat.cmd").getAbsolutePath() :
+ "xargs cat";
+
// "map" command: grep -E (red|green|blue)
// reduce command: uniq
protected File TEST_DIR;
@@ -66,9 +82,22 @@ public TestStreaming() throws IOException
UtilTest utilTest = new UtilTest(getClass().getName());
utilTest.checkUserDir();
utilTest.redirectIfAntJunit();
- TEST_DIR = new File("target/TestStreaming").getAbsoluteFile();
- OUTPUT_DIR = new File(TEST_DIR, "out");
- INPUT_FILE = new File(TEST_DIR, "input.txt");
+ setTestDir(new File("target/TestStreaming").getAbsoluteFile());
+ }
+
+ /**
+ * Sets root of test working directory and resets any other paths that must be
+ * children of the test working directory. Typical usage is for subclasses
+ * that use HDFS to override the test directory to the form "/tmp/"
+ * so that on Windows, tests won't attempt to use paths containing a ':' from
+ * the drive specifier. The ':' character is considered invalid by HDFS.
+ *
+ * @param testDir File to set
+ */
+ protected void setTestDir(File testDir) {
+ TEST_DIR = testDir;
+ OUTPUT_DIR = new File(testDir, "out");
+ INPUT_FILE = new File(testDir, "input.txt");
}
@Before
@@ -89,19 +118,18 @@ protected String getInputData() {
protected void createInput() throws IOException
{
- DataOutputStream out = getFileSystem().create(
- new Path(INPUT_FILE.getAbsolutePath()));
+ DataOutputStream out = getFileSystem().create(new Path(
+ INPUT_FILE.getPath()));
out.write(getInputData().getBytes("UTF-8"));
out.close();
}
protected void setInputOutput() {
- inputFile = INPUT_FILE.getAbsolutePath();
- outDir = OUTPUT_DIR.getAbsolutePath();
+ inputFile = INPUT_FILE.getPath();
+ outDir = OUTPUT_DIR.getPath();
}
protected String[] genArgs() {
- setInputOutput();
args.add("-input");args.add(inputFile);
args.add("-output");args.add(outDir);
args.add("-mapper");args.add(map);
@@ -129,7 +157,7 @@ protected String getExpectedOutput() {
}
protected void checkOutput() throws IOException {
- Path outPath = new Path(OUTPUT_DIR.getAbsolutePath(), "part-00000");
+ Path outPath = new Path(OUTPUT_DIR.getPath(), "part-00000");
FileSystem fs = getFileSystem();
String output = StreamUtil.slurpHadoop(outPath, fs);
fs.delete(outPath, true);
@@ -155,6 +183,7 @@ protected void assertOutput(String expectedOutput, String output) throws IOExcep
* @throws IOException
*/
protected int runStreamJob() throws IOException {
+ setInputOutput();
createInput();
boolean mayExit = false;
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java
index 444355f4fb..c21cb159f4 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java
@@ -76,7 +76,7 @@ protected String[] genArgs(boolean ignoreKey) {
return new String[] {
"-input", INPUT_FILE.getAbsolutePath(),
"-output", OUTPUT_DIR.getAbsolutePath(),
- "-mapper", "cat",
+ "-mapper", TestStreaming.CAT,
"-jobconf", MRJobConfig.PRESERVE_FAILED_TASK_FILES + "=true",
"-jobconf", "stream.non.zero.exit.is.failure=true",
"-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp"),
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java
index f3158b2640..35eb752b23 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java
@@ -120,7 +120,7 @@ public void testJavaMapperWithReduceNone() throws Exception {
@Test
public void testJavaMapperAndCommandReducer() throws Exception {
map = "org.apache.hadoop.mapred.lib.IdentityMapper";
- reduce = "cat";
+ reduce = CAT;
super.testCommandLine();
}
@@ -128,7 +128,7 @@ public void testJavaMapperAndCommandReducer() throws Exception {
@Test
public void testJavaMapperAndCommandReducerAndZeroReduces() throws Exception {
map = "org.apache.hadoop.mapred.lib.IdentityMapper";
- reduce = "cat";
+ reduce = CAT;
args.add("-numReduceTasks");
args.add("0");
super.testCommandLine();
@@ -137,7 +137,7 @@ public void testJavaMapperAndCommandReducerAndZeroReduces() throws Exception {
// Check with Command Mapper, Java Reducer
@Test
public void testCommandMapperAndJavaReducer() throws Exception {
- map = "cat";
+ map = CAT;
reduce = MyReducer.class.getName();
super.testCommandLine();
}
@@ -145,7 +145,7 @@ public void testCommandMapperAndJavaReducer() throws Exception {
// Check with Command Mapper, Java Reducer and -numReduceTasks 0
@Test
public void testCommandMapperAndJavaReducerAndZeroReduces() throws Exception {
- map = "cat";
+ map = CAT;
reduce = MyReducer.class.getName();
args.add("-numReduceTasks");
args.add("0");
@@ -155,7 +155,7 @@ public void testCommandMapperAndJavaReducerAndZeroReduces() throws Exception {
// Check with Command Mapper, Reducer = "NONE"
@Test
public void testCommandMapperWithReduceNone() throws Exception {
- map = "cat";
+ map = CAT;
reduce = "NONE";
super.testCommandLine();
}
@@ -163,8 +163,8 @@ public void testCommandMapperWithReduceNone() throws Exception {
// Check with Command Mapper, Command Reducer
@Test
public void testCommandMapperAndCommandReducer() throws Exception {
- map = "cat";
- reduce = "cat";
+ map = CAT;
+ reduce = CAT;
super.testCommandLine();
}
@@ -172,8 +172,8 @@ public void testCommandMapperAndCommandReducer() throws Exception {
@Test
public void testCommandMapperAndCommandReducerAndZeroReduces()
throws Exception {
- map = "cat";
- reduce = "cat";
+ map = CAT;
+ reduce = CAT;
args.add("-numReduceTasks");
args.add("0");
super.testCommandLine();
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java
index 823433c4c0..11c3b4e9b0 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java
@@ -83,7 +83,7 @@ String[] genArgs() {
* (b) hadoop.tasklog.totalLogFileSize
* for the children of java tasks in streaming jobs.
*/
- @Test (timeout = 30000)
+ @Test (timeout = 120000)
public void testStreamingTaskLogWithHadoopCmd() {
try {
final int numSlaves = 1;
@@ -95,13 +95,14 @@ public void testStreamingTaskLogWithHadoopCmd() {
fs.delete(testDir, true);
}
fs.mkdirs(testDir);
- File scriptFile = createScript(
- testDir.toString() + "/testTaskLog.sh");
+ File scriptFile = createScript(testDir.toString() +
+ (Shell.WINDOWS ? "/testTaskLog.cmd" : "/testTaskLog.sh"));
conf.setBoolean(JTConfig.JT_PERSIST_JOBSTATUS, false);
mr = new MiniMRCluster(numSlaves, fs.getUri().toString(), 1, null, null, conf);
writeInputFile(fs, inputPath);
- map = scriptFile.getAbsolutePath();
+ map = Shell.WINDOWS ? "cmd /c " + scriptFile.getAbsolutePath() :
+ scriptFile.getAbsolutePath();
runStreamJobAndValidateEnv();
@@ -120,8 +121,12 @@ private File createScript(String script) throws IOException {
File scriptFile = new File(script);
UtilTest.recursiveDelete(scriptFile);
FileOutputStream in = new FileOutputStream(scriptFile);
- in.write(("cat > /dev/null 2>&1\n" +
- "echo $HADOOP_ROOT_LOGGER $HADOOP_CLIENT_OPTS").getBytes());
+ if (Shell.WINDOWS) {
+ in.write("@echo %HADOOP_ROOT_LOGGER% %HADOOP_CLIENT_OPTS%".getBytes());
+ } else {
+ in.write(("cat > /dev/null 2>&1\n" +
+ "echo $HADOOP_ROOT_LOGGER $HADOOP_CLIENT_OPTS").getBytes());
+ }
in.close();
Shell.execCommand(Shell.getSetPermissionCommand("+x", false,
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java
index dba676a32d..730429d6da 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java
@@ -47,13 +47,13 @@ public class TestSymLink
String OUTPUT_DIR = "/testing-streaming/out";
String CACHE_FILE = "/testing-streaming/cache.txt";
String input = "check to see if we can read this none reduce";
- String map = "xargs cat ";
- String reduce = "cat";
+ String map = TestStreaming.XARGS_CAT;
+ String reduce = TestStreaming.CAT;
String mapString = "testlink\n";
String cacheString = "This is just the cache string";
StreamJob job;
- @Test (timeout = 60000)
+ @Test (timeout = 120000)
public void testSymLink() throws Exception
{
boolean mayExit = false;