MAPREDUCE-4740. only .jars can be added to the Distributed Cache classpath. Contributed by Robert Joseph Evans

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1401054 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jason Darrell Lowe 2012-10-22 19:50:23 +00:00
parent 75cdb5bb49
commit 49beea562a
3 changed files with 112 additions and 16 deletions

View File

@ -599,6 +599,9 @@ Release 0.23.5 - UNRELEASED
MAPREDUCE-4733. Reducer can fail to make progress during shuffle if too many
reducers complete consecutively. (Jason Lowe via vinodkv)
MAPREDUCE-4740. only .jars can be added to the Distributed Cache
classpath. (Robert Joseph Evans via jlowe)
Release 0.23.4 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -191,6 +191,7 @@ private static void setMRFrameworkClasspath(
// TODO: Remove duplicates.
}
@SuppressWarnings("deprecation")
public static void setClasspath(Map<String, String> environment,
Configuration conf) throws IOException {
boolean userClassesTakesPrecedence =
@ -218,11 +219,66 @@ public static void setClasspath(Map<String, String> environment,
environment,
Environment.CLASSPATH.name(),
Environment.PWD.$() + Path.SEPARATOR + "*");
// a * in the classpath will only find a .jar, so we need to filter out
// all .jars and add everything else
addToClasspathIfNotJar(DistributedCache.getFileClassPaths(conf),
DistributedCache.getCacheFiles(conf),
conf,
environment);
addToClasspathIfNotJar(DistributedCache.getArchiveClassPaths(conf),
DistributedCache.getCacheArchives(conf),
conf,
environment);
if (userClassesTakesPrecedence) {
MRApps.setMRFrameworkClasspath(environment, conf);
}
}
/**
* Add the paths to the classpath if they are not jars
* @param paths the paths to add to the classpath
* @param withLinks the corresponding paths that may have a link name in them
* @param conf used to resolve the paths
* @param environment the environment to update CLASSPATH in
* @throws IOException if there is an error resolving any of the paths.
*/
private static void addToClasspathIfNotJar(Path[] paths,
URI[] withLinks, Configuration conf,
Map<String, String> environment) throws IOException {
if (paths != null) {
HashMap<Path, String> linkLookup = new HashMap<Path, String>();
if (withLinks != null) {
for (URI u: withLinks) {
Path p = new Path(u);
FileSystem remoteFS = p.getFileSystem(conf);
p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(),
remoteFS.getWorkingDirectory()));
String name = (null == u.getFragment())
? p.getName() : u.getFragment();
if (!name.toLowerCase().endsWith(".jar")) {
linkLookup.put(p, name);
}
}
}
for (Path p : paths) {
FileSystem remoteFS = p.getFileSystem(conf);
p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(),
remoteFS.getWorkingDirectory()));
String name = linkLookup.get(p);
if (name == null) {
name = p.getName();
}
if(!name.toLowerCase().endsWith(".jar")) {
Apps.addToEnvironment(
environment,
Environment.CLASSPATH.name(),
Environment.PWD.$() + Path.SEPARATOR + name);
}
}
}
}
private static final String STAGING_CONSTANT = ".staging";
public static Path getStagingAreaDir(Configuration conf, String user) {
return new Path(conf.get(MRJobConfig.MR_AM_STAGING_DIR,
@ -261,8 +317,7 @@ public static void setupDistributedCache(
DistributedCache.getCacheArchives(conf),
parseTimeStamps(DistributedCache.getArchiveTimestamps(conf)),
getFileSizes(conf, MRJobConfig.CACHE_ARCHIVES_SIZES),
DistributedCache.getArchiveVisibilities(conf),
DistributedCache.getArchiveClassPaths(conf));
DistributedCache.getArchiveVisibilities(conf));
// Cache files
parseDistributedCacheArtifacts(conf,
@ -271,8 +326,7 @@ public static void setupDistributedCache(
DistributedCache.getCacheFiles(conf),
parseTimeStamps(DistributedCache.getFileTimestamps(conf)),
getFileSizes(conf, MRJobConfig.CACHE_FILES_SIZES),
DistributedCache.getFileVisibilities(conf),
DistributedCache.getFileClassPaths(conf));
DistributedCache.getFileVisibilities(conf));
}
private static String getResourceDescription(LocalResourceType type) {
@ -289,8 +343,8 @@ private static void parseDistributedCacheArtifacts(
Configuration conf,
Map<String, LocalResource> localResources,
LocalResourceType type,
URI[] uris, long[] timestamps, long[] sizes, boolean visibilities[],
Path[] pathsToPutOnClasspath) throws IOException {
URI[] uris, long[] timestamps, long[] sizes, boolean visibilities[])
throws IOException {
if (uris != null) {
// Sanity check
@ -304,15 +358,6 @@ private static void parseDistributedCacheArtifacts(
);
}
Map<String, Path> classPaths = new HashMap<String, Path>();
if (pathsToPutOnClasspath != null) {
for (Path p : pathsToPutOnClasspath) {
FileSystem remoteFS = p.getFileSystem(conf);
p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(),
remoteFS.getWorkingDirectory()));
classPaths.put(p.toUri().getPath().toString(), p);
}
}
for (int i = 0; i < uris.length; ++i) {
URI u = uris[i];
Path p = new Path(u);

View File

@ -18,6 +18,8 @@
package org.apache.hadoop.mapreduce.v2.util;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
@ -42,12 +44,36 @@
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;
import static org.mockito.Mockito.*;
public class TestMRApps {
private static File testWorkDir = null;
@BeforeClass
public static void setupTestDirs() throws IOException {
testWorkDir = new File("target", TestMRApps.class.getCanonicalName());
delete(testWorkDir);
testWorkDir.mkdirs();
testWorkDir = testWorkDir.getAbsoluteFile();
}
@AfterClass
public static void cleanupTestDirs() throws IOException {
if (testWorkDir != null) {
delete(testWorkDir);
}
}
private static void delete(File dir) throws IOException {
Path p = new Path("file://"+dir.getAbsolutePath());
Configuration conf = new Configuration();
FileSystem fs = p.getFileSystem(conf);
fs.delete(p, true);
}
@Test public void testJobIDtoString() {
JobId jid = RecordFactoryProvider.getRecordFactory(null).newRecordInstance(JobId.class);
@ -154,6 +180,28 @@ public class TestMRApps {
}
assertTrue(environment.get("CLASSPATH").contains(mrAppClasspath));
}
@Test public void testSetClasspathWithArchives () throws IOException {
File testTGZ = new File(testWorkDir, "test.tgz");
FileOutputStream out = new FileOutputStream(testTGZ);
out.write(0);
out.close();
Job job = Job.getInstance();
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.CLASSPATH_ARCHIVES, "file://"
+ testTGZ.getAbsolutePath());
conf.set(MRJobConfig.CACHE_ARCHIVES, "file://"
+ testTGZ.getAbsolutePath() + "#testTGZ");
Map<String, String> environment = new HashMap<String, String>();
MRApps.setClasspath(environment, conf);
assertTrue(environment.get("CLASSPATH").startsWith("$PWD:"));
String confClasspath = job.getConfiguration().get(YarnConfiguration.YARN_APPLICATION_CLASSPATH);
if (confClasspath != null) {
confClasspath = confClasspath.replaceAll(",\\s*", ":").trim();
}
assertTrue(environment.get("CLASSPATH").contains(confClasspath));
assertTrue(environment.get("CLASSPATH").contains("testTGZ"));
}
@Test public void testSetClasspathWithUserPrecendence() {
Configuration conf = new Configuration();