From bee6822441d1c2bedb2a135c8b56558aa5cac366 Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Wed, 6 Nov 2013 18:07:39 +0000 Subject: [PATCH] MAPREDUCE-5451. MR uses LD_LIBRARY_PATH which doesn't mean anything in Windows. Contributed by Yingda Chen. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1539413 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../apache/hadoop/mapreduce/MRJobConfig.java | 6 +- .../src/main/resources/mapred-default.xml | 9 +- .../hadoop/mapred/TestMiniMRChildTask.java | 91 ++++++++++++++++++- 4 files changed, 102 insertions(+), 7 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 08650afb67..261a1de3e4 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -243,6 +243,9 @@ Release 2.2.1 - UNRELEASED MAPREDUCE-5604. TestMRAMWithNonNormalizedCapabilities fails on Windows due to exceeding max path length. (cnauroth) + MAPREDUCE-5451. MR uses LD_LIBRARY_PATH which doesn't mean anything in + Windows. (Yingda Chen via cnauroth) + Release 2.2.0 - 2013-10-13 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 09044a374a..76097de896 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -575,8 +575,10 @@ public interface MRJobConfig { public static final String MAPRED_ADMIN_USER_ENV = "mapreduce.admin.user.env"; - public static final String DEFAULT_MAPRED_ADMIN_USER_ENV = - "LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native"; + public final String DEFAULT_MAPRED_ADMIN_USER_ENV = + Shell.WINDOWS ? + "PATH=%PATH%;%HADOOP_COMMON_HOME%\\bin": + "LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native"; public static final String WORKDIR = "work"; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 29facecb6b..c897472601 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -183,11 +183,16 @@ mapreduce.admin.user.env - LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native - Expert: Additional execution environment entries for + + + Expert: Additional execution environment entries for map and reduce task processes. This is not an additive property. You must preserve the original value if you want your map and reduce tasks to have access to native libraries (compression, etc). + When this value is empty, the command to set execution + envrionment will be OS dependent: + For linux, use LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native. + For windows, use PATH = %PATH%;%HADOOP_COMMON_HOME%\\bin. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRChildTask.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRChildTask.java index 8a7504f84f..bab95ee600 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRChildTask.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRChildTask.java @@ -23,7 +23,9 @@ import static org.junit.Assert.fail; import java.io.*; +import java.util.HashMap; import java.util.Iterator; +import java.util.Map; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -46,6 +48,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.mapreduce.v2.MiniMRYarnCluster; import org.apache.hadoop.util.Shell; +import org.apache.hadoop.yarn.util.Apps; /** * Class to test mapred task's @@ -107,6 +110,29 @@ public void configure(JobConf job) { } } } + + /** + * Map class which checks if hadoop lib location + * is in the execution path + */ + public static class ExecutionEnvCheckMapClass extends MapReduceBase + implements Mapper { + public void map (LongWritable key, Text value, + OutputCollector output, + Reporter reporter) throws IOException { + } + public void configure(JobConf job) { + String executionEnvPathVariable = System.getenv(Shell.WINDOWS ? "PATH" + : "LD_LIBRARY_PATH"); + String hadoopHome = System.getenv("HADOOP_COMMON_HOME"); + if (hadoopHome == null) { + hadoopHome = ""; + } + String hadoopLibLocation = hadoopHome + + (Shell.WINDOWS ? "\\bin" : "/lib/native"); + assertTrue(executionEnvPathVariable.contains(hadoopLibLocation)); + } + } // configure a job private void configure(JobConf conf, Path inDir, Path outDir, String input, @@ -153,8 +179,6 @@ public void launchTest(JobConf conf, Path outDir, String input) throws IOException, InterruptedException, ClassNotFoundException { - configure(conf, inDir, outDir, input, - MapClass.class, IdentityReducer.class); FileSystem outFs = outDir.getFileSystem(conf); @@ -359,7 +383,8 @@ public void testTaskTempDir(){ Path inDir = new Path("testing/wc/input"); Path outDir = new Path("testing/wc/output"); String input = "The input"; - + configure(conf, inDir, outDir, input, + MapClass.class, IdentityReducer.class); launchTest(conf, inDir, outDir, input); } catch(Exception e) { @@ -369,6 +394,66 @@ public void testTaskTempDir(){ } } + /** + * To test OS dependent setting of default execution path for a MapRed task. + * Mainly that we can use MRJobConfig.DEFAULT_MAPRED_ADMIN_USER_ENV to set - + * for WINDOWS: %HADOOP_COMMON_HOME%\bin is expected to be included in PATH - for + * Linux: $HADOOP_COMMON_HOME/lib/native is expected to be included in + * LD_LIBRARY_PATH + */ + @Test + public void testMapRedExecutionEnv() { + // test if the env variable can be set + try { + // Application environment + Map environment = new HashMap(); + String setupHadoopHomeCommand = Shell.WINDOWS ? + "HADOOP_COMMON_HOME=C:\\fake\\PATH\\to\\hadoop\\common\\home" : + "HADOOP_COMMON_HOME=/fake/path/to/hadoop/common/home"; + Apps.setEnvFromInputString(environment, setupHadoopHomeCommand); + + // Add the env variables passed by the admin + Apps.setEnvFromInputString(environment, conf.get( + MRJobConfig.MAPRED_ADMIN_USER_ENV, + MRJobConfig.DEFAULT_MAPRED_ADMIN_USER_ENV)); + + String executionPaths = environment.get( + Shell.WINDOWS ? "PATH" : "LD_LIBRARY_PATH"); + String toFind = Shell.WINDOWS ? + "C:\\fake\\PATH\\to\\hadoop\\common\\home\\bin" : + "/fake/path/to/hadoop/common/home/lib/native"; + + // Ensure execution PATH/LD_LIBRARY_PATH set up pointing to hadoop lib + assertTrue("execution path does not include the hadoop lib location " + + toFind, executionPaths.contains(toFind)); + } catch (Exception e) { + e.printStackTrace(); + fail("Exception in testing execution environment for MapReduce task"); + tearDown(); + } + + // now launch a mapreduce job to ensure that the child + // also gets the configured setting for hadoop lib + try { + + JobConf conf = new JobConf(mr.getConfig()); + // initialize input, output directories + Path inDir = new Path("input"); + Path outDir = new Path("output"); + String input = "The input"; + + // set config to use the ExecutionEnvCheckMapClass map class + configure(conf, inDir, outDir, input, + ExecutionEnvCheckMapClass.class, IdentityReducer.class); + launchTest(conf, inDir, outDir, input); + + } catch(Exception e) { + e.printStackTrace(); + fail("Exception in testing propagation of env setting to child task"); + tearDown(); + } + } + /** * Test to test if the user set env variables reflect in the child * processes. Mainly