MAPREDUCE-4421. Run MapReduce framework via the distributed cache. Contributed by Jason Lowe
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1528237 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b2a7811fe7
commit
db06f1bcb9
@ -162,6 +162,8 @@ Release 2.3.0 - UNRELEASED
|
|||||||
MAPREDUCE-434. LocalJobRunner limited to single reducer (Sandy Ryza and
|
MAPREDUCE-434. LocalJobRunner limited to single reducer (Sandy Ryza and
|
||||||
Aaron Kimball via Sandy Ryza)
|
Aaron Kimball via Sandy Ryza)
|
||||||
|
|
||||||
|
MAPREDUCE-4421. Run MapReduce framework via the distributed cache (jlowe)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
MAPREDUCE-5484. YarnChild unnecessarily loads job conf twice (Sandy Ryza)
|
MAPREDUCE-5484. YarnChild unnecessarily loads job conf twice (Sandy Ryza)
|
||||||
|
@ -21,6 +21,7 @@ package org.apache.hadoop.mapreduce.v2.util;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
import java.security.AccessController;
|
import java.security.AccessController;
|
||||||
import java.security.PrivilegedActionException;
|
import java.security.PrivilegedActionException;
|
||||||
import java.security.PrivilegedExceptionAction;
|
import java.security.PrivilegedExceptionAction;
|
||||||
@ -133,6 +134,30 @@ public class MRApps extends Apps {
|
|||||||
return TaskAttemptStateUI.valueOf(attemptStateStr);
|
return TaskAttemptStateUI.valueOf(attemptStateStr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// gets the base name of the MapReduce framework or null if no
|
||||||
|
// framework was configured
|
||||||
|
private static String getMRFrameworkName(Configuration conf) {
|
||||||
|
String frameworkName = null;
|
||||||
|
String framework =
|
||||||
|
conf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, "");
|
||||||
|
if (!framework.isEmpty()) {
|
||||||
|
URI uri;
|
||||||
|
try {
|
||||||
|
uri = new URI(framework);
|
||||||
|
} catch (URISyntaxException e) {
|
||||||
|
throw new IllegalArgumentException("Unable to parse '" + framework
|
||||||
|
+ "' as a URI, check the setting for "
|
||||||
|
+ MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
frameworkName = uri.getFragment();
|
||||||
|
if (frameworkName == null) {
|
||||||
|
frameworkName = new Path(uri).getName();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return frameworkName;
|
||||||
|
}
|
||||||
|
|
||||||
private static void setMRFrameworkClasspath(
|
private static void setMRFrameworkClasspath(
|
||||||
Map<String, String> environment, Configuration conf) throws IOException {
|
Map<String, String> environment, Configuration conf) throws IOException {
|
||||||
// Propagate the system classpath when using the mini cluster
|
// Propagate the system classpath when using the mini cluster
|
||||||
@ -141,18 +166,33 @@ public class MRApps extends Apps {
|
|||||||
System.getProperty("java.class.path"));
|
System.getProperty("java.class.path"));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add standard Hadoop classes
|
// if the framework is specified then only use the MR classpath
|
||||||
for (String c : conf.getStrings(
|
String frameworkName = getMRFrameworkName(conf);
|
||||||
YarnConfiguration.YARN_APPLICATION_CLASSPATH,
|
if (frameworkName == null) {
|
||||||
YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
|
// Add standard Hadoop classes
|
||||||
Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c
|
for (String c : conf.getStrings(
|
||||||
.trim());
|
YarnConfiguration.YARN_APPLICATION_CLASSPATH,
|
||||||
|
YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH)) {
|
||||||
|
Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c
|
||||||
|
.trim());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boolean foundFrameworkInClasspath = (frameworkName == null);
|
||||||
for (String c : conf.getStrings(
|
for (String c : conf.getStrings(
|
||||||
MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH,
|
MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH,
|
||||||
MRJobConfig.DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH)) {
|
MRJobConfig.DEFAULT_MAPREDUCE_APPLICATION_CLASSPATH)) {
|
||||||
Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c
|
Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c
|
||||||
.trim());
|
.trim());
|
||||||
|
if (!foundFrameworkInClasspath) {
|
||||||
|
foundFrameworkInClasspath = c.contains(frameworkName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!foundFrameworkInClasspath) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Could not locate MapReduce framework name '" + frameworkName
|
||||||
|
+ "' in " + MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH);
|
||||||
}
|
}
|
||||||
// TODO: Remove duplicates.
|
// TODO: Remove duplicates.
|
||||||
}
|
}
|
||||||
|
@ -282,7 +282,46 @@ public class TestMRApps {
|
|||||||
assertEquals("MAPREDUCE_JOB_CLASSLOADER true, but job.jar is not in the app"
|
assertEquals("MAPREDUCE_JOB_CLASSLOADER true, but job.jar is not in the app"
|
||||||
+ " classpath!", expectedAppClasspath, appCp);
|
+ " classpath!", expectedAppClasspath, appCp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test (timeout = 3000000)
|
||||||
|
public void testSetClasspathWithFramework() throws IOException {
|
||||||
|
final String FRAMEWORK_NAME = "some-framework-name";
|
||||||
|
final String FRAMEWORK_PATH = "some-framework-path#" + FRAMEWORK_NAME;
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, FRAMEWORK_PATH);
|
||||||
|
Map<String, String> env = new HashMap<String, String>();
|
||||||
|
try {
|
||||||
|
MRApps.setClasspath(env, conf);
|
||||||
|
fail("Failed to catch framework path set without classpath change");
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
assertTrue("Unexpected IllegalArgumentException",
|
||||||
|
e.getMessage().contains("Could not locate MapReduce framework name '"
|
||||||
|
+ FRAMEWORK_NAME + "'"));
|
||||||
|
}
|
||||||
|
|
||||||
|
env.clear();
|
||||||
|
final String FRAMEWORK_CLASSPATH = FRAMEWORK_NAME + "/*.jar";
|
||||||
|
conf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, FRAMEWORK_CLASSPATH);
|
||||||
|
MRApps.setClasspath(env, conf);
|
||||||
|
final String stdClasspath = StringUtils.join(File.pathSeparator,
|
||||||
|
Arrays.asList("job.jar/job.jar", "job.jar/classes/", "job.jar/lib/*",
|
||||||
|
ApplicationConstants.Environment.PWD.$() + "/*"));
|
||||||
|
String expectedClasspath = StringUtils.join(File.pathSeparator,
|
||||||
|
Arrays.asList(ApplicationConstants.Environment.PWD.$(),
|
||||||
|
FRAMEWORK_CLASSPATH, stdClasspath));
|
||||||
|
assertEquals("Incorrect classpath with framework and no user precedence",
|
||||||
|
expectedClasspath, env.get("CLASSPATH"));
|
||||||
|
|
||||||
|
env.clear();
|
||||||
|
conf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, true);
|
||||||
|
MRApps.setClasspath(env, conf);
|
||||||
|
expectedClasspath = StringUtils.join(File.pathSeparator,
|
||||||
|
Arrays.asList(ApplicationConstants.Environment.PWD.$(),
|
||||||
|
stdClasspath, FRAMEWORK_CLASSPATH));
|
||||||
|
assertEquals("Incorrect classpath with framework and user precedence",
|
||||||
|
expectedClasspath, env.get("CLASSPATH"));
|
||||||
|
}
|
||||||
|
|
||||||
@Test (timeout = 30000)
|
@Test (timeout = 30000)
|
||||||
public void testSetupDistributedCacheEmpty() throws IOException {
|
public void testSetupDistributedCacheEmpty() throws IOException {
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
|
@ -39,6 +39,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
|
|||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileContext;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.FileUtil;
|
import org.apache.hadoop.fs.FileUtil;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
@ -340,11 +341,12 @@ class JobSubmitter {
|
|||||||
|
|
||||||
//validate the jobs output specs
|
//validate the jobs output specs
|
||||||
checkSpecs(job);
|
checkSpecs(job);
|
||||||
|
|
||||||
Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster,
|
|
||||||
job.getConfiguration());
|
|
||||||
//configure the command line options correctly on the submitting dfs
|
|
||||||
Configuration conf = job.getConfiguration();
|
Configuration conf = job.getConfiguration();
|
||||||
|
addMRFrameworkToDistributedCache(conf);
|
||||||
|
|
||||||
|
Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, conf);
|
||||||
|
//configure the command line options correctly on the submitting dfs
|
||||||
InetAddress ip = InetAddress.getLocalHost();
|
InetAddress ip = InetAddress.getLocalHost();
|
||||||
if (ip != null) {
|
if (ip != null) {
|
||||||
submitHostAddress = ip.getHostAddress();
|
submitHostAddress = ip.getHostAddress();
|
||||||
@ -602,7 +604,6 @@ class JobSubmitter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//get secret keys and tokens and store them into TokenCache
|
//get secret keys and tokens and store them into TokenCache
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
private void populateTokenCache(Configuration conf, Credentials credentials)
|
private void populateTokenCache(Configuration conf, Credentials credentials)
|
||||||
throws IOException{
|
throws IOException{
|
||||||
readTokensFromFiles(conf, credentials);
|
readTokensFromFiles(conf, credentials);
|
||||||
@ -618,4 +619,41 @@ class JobSubmitter {
|
|||||||
TokenCache.obtainTokensForNamenodes(credentials, ps, conf);
|
TokenCache.obtainTokensForNamenodes(credentials, ps, conf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
|
private static void addMRFrameworkToDistributedCache(Configuration conf)
|
||||||
|
throws IOException {
|
||||||
|
String framework =
|
||||||
|
conf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, "");
|
||||||
|
if (!framework.isEmpty()) {
|
||||||
|
URI uri;
|
||||||
|
try {
|
||||||
|
uri = new URI(framework);
|
||||||
|
} catch (URISyntaxException e) {
|
||||||
|
throw new IllegalArgumentException("Unable to parse '" + framework
|
||||||
|
+ "' as a URI, check the setting for "
|
||||||
|
+ MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
String linkedName = uri.getFragment();
|
||||||
|
|
||||||
|
// resolve any symlinks in the URI path so using a "current" symlink
|
||||||
|
// to point to a specific version shows the specific version
|
||||||
|
// in the distributed cache configuration
|
||||||
|
FileSystem fs = FileSystem.get(conf);
|
||||||
|
Path frameworkPath = fs.makeQualified(
|
||||||
|
new Path(uri.getScheme(), uri.getAuthority(), uri.getPath()));
|
||||||
|
FileContext fc = FileContext.getFileContext(frameworkPath.toUri(), conf);
|
||||||
|
frameworkPath = fc.resolvePath(frameworkPath);
|
||||||
|
uri = frameworkPath.toUri();
|
||||||
|
try {
|
||||||
|
uri = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(),
|
||||||
|
null, linkedName);
|
||||||
|
} catch (URISyntaxException e) {
|
||||||
|
throw new IllegalArgumentException(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
DistributedCache.addCacheArchive(uri, conf);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -649,6 +649,12 @@ public interface MRJobConfig {
|
|||||||
public static final String MAPREDUCE_APPLICATION_CLASSPATH =
|
public static final String MAPREDUCE_APPLICATION_CLASSPATH =
|
||||||
"mapreduce.application.classpath";
|
"mapreduce.application.classpath";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Path to MapReduce framework archive
|
||||||
|
*/
|
||||||
|
public static final String MAPREDUCE_APPLICATION_FRAMEWORK_PATH =
|
||||||
|
"mapreduce.application.framework.path";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default CLASSPATH for all YARN MapReduce applications.
|
* Default CLASSPATH for all YARN MapReduce applications.
|
||||||
*/
|
*/
|
||||||
|
@ -1024,11 +1024,31 @@
|
|||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>CLASSPATH for MR applications. A comma-separated list
|
<description>CLASSPATH for MR applications. A comma-separated list
|
||||||
of CLASSPATH entries</description>
|
of CLASSPATH entries. If mapreduce.application.framework is set then this
|
||||||
|
must specify the appropriate classpath for that archive, and the name of
|
||||||
|
the archive must be present in the classpath.</description>
|
||||||
<name>mapreduce.application.classpath</name>
|
<name>mapreduce.application.classpath</name>
|
||||||
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
|
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>Path to the MapReduce framework archive. If set, the framework
|
||||||
|
archive will automatically be distributed along with the job, and this
|
||||||
|
path would normally reside in a public location in an HDFS filesystem. As
|
||||||
|
with distributed cache files, this can be a URL with a fragment specifying
|
||||||
|
the alias to use for the archive name. For example,
|
||||||
|
hdfs:/mapred/framework/hadoop-mapreduce-2.1.1.tar.gz#mrframework would
|
||||||
|
alias the localized archive as "mrframework".
|
||||||
|
|
||||||
|
Note that mapreduce.application.classpath must include the appropriate
|
||||||
|
classpath for the specified framework. The base name of the archive, or
|
||||||
|
alias of the archive if an alias is used, must appear in the specified
|
||||||
|
classpath.
|
||||||
|
</description>
|
||||||
|
<name>mapreduce.application.framework.path</name>
|
||||||
|
<value></value>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.job.classloader</name>
|
<name>mapreduce.job.classloader</name>
|
||||||
<value>false</value>
|
<value>false</value>
|
||||||
|
@ -0,0 +1,120 @@
|
|||||||
|
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
~~ you may not use this file except in compliance with the License.
|
||||||
|
~~ You may obtain a copy of the License at
|
||||||
|
~~
|
||||||
|
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
~~
|
||||||
|
~~ Unless required by applicable law or agreed to in writing, software
|
||||||
|
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
~~ See the License for the specific language governing permissions and
|
||||||
|
~~ limitations under the License. See accompanying LICENSE file.
|
||||||
|
|
||||||
|
---
|
||||||
|
Hadoop Map Reduce Next Generation-${project.version} - Distributed Cache Deploy
|
||||||
|
---
|
||||||
|
---
|
||||||
|
${maven.build.timestamp}
|
||||||
|
|
||||||
|
Hadoop MapReduce Next Generation - Distributed Cache Deploy
|
||||||
|
|
||||||
|
\[ {{{./index.html}Go Back}} \]
|
||||||
|
|
||||||
|
* Introduction
|
||||||
|
|
||||||
|
The MapReduce application framework has rudimentary support for deploying a
|
||||||
|
new version of the MapReduce framework via the distributed cache. By setting
|
||||||
|
the appropriate configuration properties, users can run a different version
|
||||||
|
of MapReduce than the one initially deployed to the cluster. For example,
|
||||||
|
cluster administrators can place multiple versions of MapReduce in HDFS and
|
||||||
|
configure <<<mapred-site.xml>>> to specify which version jobs will use by
|
||||||
|
default. This allows the administrators to perform a rolling upgrade of the
|
||||||
|
MapReduce framework under certain conditions.
|
||||||
|
|
||||||
|
* Preconditions and Limitations
|
||||||
|
|
||||||
|
The support for deploying the MapReduce framework via the distributed cache
|
||||||
|
currently does not address the job client code used to submit and query
|
||||||
|
jobs. It also does not address the <<<ShuffleHandler>>> code that runs as an
|
||||||
|
auxilliary service within each NodeManager. As a result the following
|
||||||
|
limitations apply to MapReduce versions that can be successfully deployed via
|
||||||
|
the distributed cache in a rolling upgrade fashion:
|
||||||
|
|
||||||
|
* The MapReduce version must be compatible with the job client code used to
|
||||||
|
submit and query jobs. If it is incompatible then the job client must be
|
||||||
|
upgraded separately on any node from which jobs using the new MapReduce
|
||||||
|
version will be submitted or queried.
|
||||||
|
|
||||||
|
* The MapReduce version must be compatible with the configuration files used
|
||||||
|
by the job client submitting the jobs. If it is incompatible with that
|
||||||
|
configuration (e.g.: a new property must be set or an existing property
|
||||||
|
value changed) then the configuration must be updated first.
|
||||||
|
|
||||||
|
* The MapReduce version must be compatible with the <<<ShuffleHandler>>>
|
||||||
|
version running on the nodes in the cluster. If it is incompatible then the
|
||||||
|
new <<<ShuffleHandler>>> code must be deployed to all the nodes in the
|
||||||
|
cluster, and the NodeManagers must be restarted to pick up the new
|
||||||
|
<<<ShuffleHandler>>> code.
|
||||||
|
|
||||||
|
* Deploying a New MapReduce Version via the Distributed Cache
|
||||||
|
|
||||||
|
Deploying a new MapReduce version consists of three steps:
|
||||||
|
|
||||||
|
[[1]] Upload the MapReduce archive to a location that can be accessed by the
|
||||||
|
job submission client. Ideally the archive should be on the cluster's default
|
||||||
|
filesystem at a publicly-readable path. See the archive location discussion
|
||||||
|
below for more details.
|
||||||
|
|
||||||
|
[[2]] Configure <<<mapreduce.application.framework.path>>> to point to the
|
||||||
|
location where the archive is located. As when specifying distributed cache
|
||||||
|
files for a job, this is a URL that also supports creating an alias for the
|
||||||
|
archive if a URL fragment is specified. For example,
|
||||||
|
<<<hdfs:/mapred/framework/hadoop-mapreduce-2.1.1.tar.gz#mrframework>>> will
|
||||||
|
be localized as <<<mrframework>>> rather than
|
||||||
|
<<<hadoop-mapreduce-2.1.1.tar.gz>>>.
|
||||||
|
|
||||||
|
[[3]] Configure <<<mapreduce.application.classpath>>> to set the proper
|
||||||
|
classpath to use with the MapReduce archive configured above. NOTE: An error
|
||||||
|
occurs if <<<mapreduce.application.framework.path>>> is configured but
|
||||||
|
<<<mapreduce.application.classpath>>> does not reference the base name of the
|
||||||
|
archive path or the alias if an alias was specified.
|
||||||
|
|
||||||
|
** Location of the MapReduce Archive and How It Affects Job Performance
|
||||||
|
|
||||||
|
Note that the location of the MapReduce archive can be critical to job
|
||||||
|
submission and job startup performance. If the archive is not located on the
|
||||||
|
cluster's default filesystem then it will be copied to the job staging
|
||||||
|
directory for each job and localized to each node where the job's tasks
|
||||||
|
run. This will slow down job submission and task startup performance.
|
||||||
|
|
||||||
|
If the archive is located on the default filesystem then the job client will
|
||||||
|
not upload the archive to the job staging directory for each job
|
||||||
|
submission. However if the archive path is not readable by all cluster users
|
||||||
|
then the archive will be localized separately for each user on each node
|
||||||
|
where tasks execute. This can cause unnecessary duplication in the
|
||||||
|
distributed cache.
|
||||||
|
|
||||||
|
When working with a large cluster it can be important to increase the
|
||||||
|
replication factor of the archive to increase its availability. This will
|
||||||
|
spread the load when the nodes in the cluster localize the archive for the
|
||||||
|
first time.
|
||||||
|
|
||||||
|
* MapReduce Archives and Classpath Configuration
|
||||||
|
|
||||||
|
Setting a proper classpath for the MapReduce archive depends upon the
|
||||||
|
composition of the archive and whether it has any additional dependencies.
|
||||||
|
For example, the archive can contain not only the MapReduce jars but also the
|
||||||
|
necessary YARN, HDFS, and Hadoop Common jars and all other dependencies. In
|
||||||
|
that case, <<<mapreduce.application.classpath>>> would be configured to
|
||||||
|
something like the following example, where the archive basename is
|
||||||
|
hadoop-mapreduce-2.1.1.tar.gz and the archive is organized internally similar
|
||||||
|
to the standard Hadoop distribution archive:
|
||||||
|
|
||||||
|
<<<$HADOOP_CONF_DIR,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/mapreduce/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/mapreduce/lib/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/common/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/common/lib/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/yarn/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/yarn/lib/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/hdfs/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/hdfs/lib/*>>>
|
||||||
|
|
||||||
|
Another possible approach is to have the archive consist of just the
|
||||||
|
MapReduce jars and have the remaining dependencies picked up from the Hadoop
|
||||||
|
distribution installed on the nodes. In that case, the above example would
|
||||||
|
change to something like the following:
|
||||||
|
|
||||||
|
<<<$HADOOP_CONF_DIR,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/mapreduce/*,$PWD/hadoop-mapreduce-2.1.1.tar.gz/hadoop-mapreduce-2.1.1/share/hadoop/mapreduce/lib/*,$HADOOP_COMMON_HOME/share/hadoop/common/*,$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,$HADOOP_YARN_HOME/share/hadoop/yarn/*,$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*>>>
|
@ -86,6 +86,7 @@
|
|||||||
<item name="Compatibilty between Hadoop 1.x and Hadoop 2.x" href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html"/>
|
<item name="Compatibilty between Hadoop 1.x and Hadoop 2.x" href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html"/>
|
||||||
<item name="Encrypted Shuffle" href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html"/>
|
<item name="Encrypted Shuffle" href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html"/>
|
||||||
<item name="Pluggable Shuffle/Sort" href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html"/>
|
<item name="Pluggable Shuffle/Sort" href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html"/>
|
||||||
|
<item name="Distributed Cache Deploy" href="hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistributedCacheDeploy.html"/>
|
||||||
</menu>
|
</menu>
|
||||||
|
|
||||||
<menu name="YARN" inherit="top">
|
<menu name="YARN" inherit="top">
|
||||||
|
Loading…
x
Reference in New Issue
Block a user