diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 0c64a5b6ad..93b5d63e76 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -131,6 +131,9 @@ Trunk (Unreleased) MAPREDUCE-5078. TestMRAppMaster fails on Windows due to mismatched path separators. (Chris Nauroth via sseth) + MAPREDUCE-4885. Streaming tests have multiple failures on Windows. (Chris + Nauroth via bikas) + BREAKDOWN OF HADOOP-8562 SUBTASKS MAPREDUCE-4739. Some MapReduce tests fail to find winutils. diff --git a/hadoop-tools/hadoop-streaming/pom.xml b/hadoop-tools/hadoop-streaming/pom.xml index 9924969316..7265c0468d 100644 --- a/hadoop-tools/hadoop-streaming/pom.xml +++ b/hadoop-tools/hadoop-streaming/pom.xml @@ -127,6 +127,20 @@ + + copy-test-bin + process-test-resources + + run + + + + + + + + + diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java index 2acc5563bd..d18a7654e0 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java @@ -294,8 +294,7 @@ void parseArgv() { for (String file : values) { packageFiles_.add(file); try { - URI pathURI = new URI(file); - Path path = new Path(pathURI); + Path path = new Path(file); FileSystem localFs = FileSystem.getLocal(config_); String finalPath = path.makeQualified(localFs).toString(); if(fileList.length() > 0) { diff --git a/hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd b/hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd new file mode 100644 index 0000000000..4b38e3e3b4 --- /dev/null +++ b/hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd @@ -0,0 +1,18 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@for /F "usebackq tokens=* delims=" %%A in (`findstr .`) do @echo %%A +@rem lines have been copied from stdin to stdout diff --git a/hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd b/hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd new file mode 100644 index 0000000000..f398a8d65c --- /dev/null +++ b/hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd @@ -0,0 +1,18 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@for /F "usebackq tokens=* delims=" %%A in (`findstr .`) do @type %%A +@rem files named on stdin have been copied to stdout diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java index ac577e4c7e..e864e9d855 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java @@ -19,6 +19,7 @@ package org.apache.hadoop.streaming; import java.io.DataOutputStream; +import java.io.File; import java.io.IOException; import java.util.Map; @@ -27,6 +28,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.MiniMRCluster; +import org.apache.hadoop.util.Shell; import org.junit.After; import org.junit.Before; @@ -45,7 +47,8 @@ public class TestFileArgs extends TestStreaming private static final String EXPECTED_OUTPUT = "job.jar\t\nsidefile\t\n"; - private static final String LS_PATH = "/bin/ls"; + private static final String LS_PATH = Shell.WINDOWS ? "cmd /c dir /B" : + "/bin/ls"; public TestFileArgs() throws IOException { @@ -58,6 +61,7 @@ public TestFileArgs() throws IOException map = LS_PATH; FileSystem.setDefaultUri(conf, "hdfs://" + namenode); + setTestDir(new File("/tmp/TestFileArgs")); } @Before diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java index c5136e6dc7..47b70ef6fb 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java @@ -70,8 +70,8 @@ public TestMultipleArchiveFiles() throws Exception { namenode = fileSys.getUri().getAuthority(); mr = new MiniMRCluster(1, namenode, 1); - map = "xargs cat"; - reduce = "cat"; + map = XARGS_CAT; + reduce = CAT; } @Override @@ -84,7 +84,8 @@ protected void createInput() throws IOException { fileSys.delete(new Path(INPUT_DIR), true); DataOutputStream dos = fileSys.create(new Path(INPUT_FILE)); - String inputFileString = "symlink1/cacheArchive1\nsymlink2/cacheArchive2"; + String inputFileString = "symlink1" + File.separator + + "cacheArchive1\nsymlink2" + File.separator + "cacheArchive2"; dos.write(inputFileString.getBytes("UTF-8")); dos.close(); diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java index 357bfcfd0b..ae8f57d231 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java @@ -49,8 +49,8 @@ public class TestMultipleCachefiles String CACHE_FILE = "/testing-streaming/cache.txt"; String CACHE_FILE_2 = "/testing-streaming/cache2.txt"; String input = "check to see if we can read this none reduce"; - String map = "xargs cat "; - String reduce = "cat"; + String map = TestStreaming.XARGS_CAT; + String reduce = TestStreaming.CAT; String mapString = "testlink"; String mapString2 = "testlink2"; String cacheString = "This is just the cache string"; diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java index da0bdae484..53009dbbab 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java @@ -33,7 +33,7 @@ public TestStreamXmlRecordReader() throws IOException { INPUT_FILE = new File("target/input.xml"); input = "\t\nroses.are.red\t\nviolets.are.blue\t\n" + "bunnies.are.pink\t\n\t\n"; - map = "cat"; + map = CAT; reduce = "NONE"; outputExpect = input; } diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java index 98ed1a299e..4f39120a16 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java @@ -33,7 +33,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.conf.Configuration; - +import org.apache.hadoop.util.Shell; /** * This class tests hadoopStreaming in MapReduce local mode. @@ -43,6 +43,22 @@ public class TestStreaming public static final String STREAMING_JAR = JarFinder.getJar(StreamJob.class); + /** + * cat command used for copying stdin to stdout as mapper or reducer function. + * On Windows, use a cmd script that approximates the functionality of cat. + */ + static final String CAT = Shell.WINDOWS ? + "cmd /c " + new File("target/bin/cat.cmd").getAbsolutePath() : "cat"; + + /** + * Command used for iterating through file names on stdin and copying each + * file's contents to stdout, used as mapper or reducer function. On Windows, + * use a cmd script that approximates the functionality of xargs cat. + */ + static final String XARGS_CAT = Shell.WINDOWS ? + "cmd /c " + new File("target/bin/xargs_cat.cmd").getAbsolutePath() : + "xargs cat"; + // "map" command: grep -E (red|green|blue) // reduce command: uniq protected File TEST_DIR; @@ -66,9 +82,22 @@ public TestStreaming() throws IOException UtilTest utilTest = new UtilTest(getClass().getName()); utilTest.checkUserDir(); utilTest.redirectIfAntJunit(); - TEST_DIR = new File("target/TestStreaming").getAbsoluteFile(); - OUTPUT_DIR = new File(TEST_DIR, "out"); - INPUT_FILE = new File(TEST_DIR, "input.txt"); + setTestDir(new File("target/TestStreaming").getAbsoluteFile()); + } + + /** + * Sets root of test working directory and resets any other paths that must be + * children of the test working directory. Typical usage is for subclasses + * that use HDFS to override the test directory to the form "/tmp/" + * so that on Windows, tests won't attempt to use paths containing a ':' from + * the drive specifier. The ':' character is considered invalid by HDFS. + * + * @param testDir File to set + */ + protected void setTestDir(File testDir) { + TEST_DIR = testDir; + OUTPUT_DIR = new File(testDir, "out"); + INPUT_FILE = new File(testDir, "input.txt"); } @Before @@ -89,19 +118,18 @@ protected String getInputData() { protected void createInput() throws IOException { - DataOutputStream out = getFileSystem().create( - new Path(INPUT_FILE.getAbsolutePath())); + DataOutputStream out = getFileSystem().create(new Path( + INPUT_FILE.getPath())); out.write(getInputData().getBytes("UTF-8")); out.close(); } protected void setInputOutput() { - inputFile = INPUT_FILE.getAbsolutePath(); - outDir = OUTPUT_DIR.getAbsolutePath(); + inputFile = INPUT_FILE.getPath(); + outDir = OUTPUT_DIR.getPath(); } protected String[] genArgs() { - setInputOutput(); args.add("-input");args.add(inputFile); args.add("-output");args.add(outDir); args.add("-mapper");args.add(map); @@ -129,7 +157,7 @@ protected String getExpectedOutput() { } protected void checkOutput() throws IOException { - Path outPath = new Path(OUTPUT_DIR.getAbsolutePath(), "part-00000"); + Path outPath = new Path(OUTPUT_DIR.getPath(), "part-00000"); FileSystem fs = getFileSystem(); String output = StreamUtil.slurpHadoop(outPath, fs); fs.delete(outPath, true); @@ -155,6 +183,7 @@ protected void assertOutput(String expectedOutput, String output) throws IOExcep * @throws IOException */ protected int runStreamJob() throws IOException { + setInputOutput(); createInput(); boolean mayExit = false; diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java index 444355f4fb..c21cb159f4 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java @@ -76,7 +76,7 @@ protected String[] genArgs(boolean ignoreKey) { return new String[] { "-input", INPUT_FILE.getAbsolutePath(), "-output", OUTPUT_DIR.getAbsolutePath(), - "-mapper", "cat", + "-mapper", TestStreaming.CAT, "-jobconf", MRJobConfig.PRESERVE_FAILED_TASK_FILES + "=true", "-jobconf", "stream.non.zero.exit.is.failure=true", "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp"), diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java index f3158b2640..35eb752b23 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java @@ -120,7 +120,7 @@ public void testJavaMapperWithReduceNone() throws Exception { @Test public void testJavaMapperAndCommandReducer() throws Exception { map = "org.apache.hadoop.mapred.lib.IdentityMapper"; - reduce = "cat"; + reduce = CAT; super.testCommandLine(); } @@ -128,7 +128,7 @@ public void testJavaMapperAndCommandReducer() throws Exception { @Test public void testJavaMapperAndCommandReducerAndZeroReduces() throws Exception { map = "org.apache.hadoop.mapred.lib.IdentityMapper"; - reduce = "cat"; + reduce = CAT; args.add("-numReduceTasks"); args.add("0"); super.testCommandLine(); @@ -137,7 +137,7 @@ public void testJavaMapperAndCommandReducerAndZeroReduces() throws Exception { // Check with Command Mapper, Java Reducer @Test public void testCommandMapperAndJavaReducer() throws Exception { - map = "cat"; + map = CAT; reduce = MyReducer.class.getName(); super.testCommandLine(); } @@ -145,7 +145,7 @@ public void testCommandMapperAndJavaReducer() throws Exception { // Check with Command Mapper, Java Reducer and -numReduceTasks 0 @Test public void testCommandMapperAndJavaReducerAndZeroReduces() throws Exception { - map = "cat"; + map = CAT; reduce = MyReducer.class.getName(); args.add("-numReduceTasks"); args.add("0"); @@ -155,7 +155,7 @@ public void testCommandMapperAndJavaReducerAndZeroReduces() throws Exception { // Check with Command Mapper, Reducer = "NONE" @Test public void testCommandMapperWithReduceNone() throws Exception { - map = "cat"; + map = CAT; reduce = "NONE"; super.testCommandLine(); } @@ -163,8 +163,8 @@ public void testCommandMapperWithReduceNone() throws Exception { // Check with Command Mapper, Command Reducer @Test public void testCommandMapperAndCommandReducer() throws Exception { - map = "cat"; - reduce = "cat"; + map = CAT; + reduce = CAT; super.testCommandLine(); } @@ -172,8 +172,8 @@ public void testCommandMapperAndCommandReducer() throws Exception { @Test public void testCommandMapperAndCommandReducerAndZeroReduces() throws Exception { - map = "cat"; - reduce = "cat"; + map = CAT; + reduce = CAT; args.add("-numReduceTasks"); args.add("0"); super.testCommandLine(); diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java index 823433c4c0..11c3b4e9b0 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java @@ -83,7 +83,7 @@ String[] genArgs() { * (b) hadoop.tasklog.totalLogFileSize * for the children of java tasks in streaming jobs. */ - @Test (timeout = 30000) + @Test (timeout = 120000) public void testStreamingTaskLogWithHadoopCmd() { try { final int numSlaves = 1; @@ -95,13 +95,14 @@ public void testStreamingTaskLogWithHadoopCmd() { fs.delete(testDir, true); } fs.mkdirs(testDir); - File scriptFile = createScript( - testDir.toString() + "/testTaskLog.sh"); + File scriptFile = createScript(testDir.toString() + + (Shell.WINDOWS ? "/testTaskLog.cmd" : "/testTaskLog.sh")); conf.setBoolean(JTConfig.JT_PERSIST_JOBSTATUS, false); mr = new MiniMRCluster(numSlaves, fs.getUri().toString(), 1, null, null, conf); writeInputFile(fs, inputPath); - map = scriptFile.getAbsolutePath(); + map = Shell.WINDOWS ? "cmd /c " + scriptFile.getAbsolutePath() : + scriptFile.getAbsolutePath(); runStreamJobAndValidateEnv(); @@ -120,8 +121,12 @@ private File createScript(String script) throws IOException { File scriptFile = new File(script); UtilTest.recursiveDelete(scriptFile); FileOutputStream in = new FileOutputStream(scriptFile); - in.write(("cat > /dev/null 2>&1\n" + - "echo $HADOOP_ROOT_LOGGER $HADOOP_CLIENT_OPTS").getBytes()); + if (Shell.WINDOWS) { + in.write("@echo %HADOOP_ROOT_LOGGER% %HADOOP_CLIENT_OPTS%".getBytes()); + } else { + in.write(("cat > /dev/null 2>&1\n" + + "echo $HADOOP_ROOT_LOGGER $HADOOP_CLIENT_OPTS").getBytes()); + } in.close(); Shell.execCommand(Shell.getSetPermissionCommand("+x", false, diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java index dba676a32d..730429d6da 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java @@ -47,13 +47,13 @@ public class TestSymLink String OUTPUT_DIR = "/testing-streaming/out"; String CACHE_FILE = "/testing-streaming/cache.txt"; String input = "check to see if we can read this none reduce"; - String map = "xargs cat "; - String reduce = "cat"; + String map = TestStreaming.XARGS_CAT; + String reduce = TestStreaming.CAT; String mapString = "testlink\n"; String cacheString = "This is just the cache string"; StreamJob job; - @Test (timeout = 60000) + @Test (timeout = 120000) public void testSymLink() throws Exception { boolean mayExit = false;