MAPREDUCE-3426. Fixed MR AM in uber mode to write map intermediate outputs in the correct directory to work properly in secure mode. Contributed by Hitesh Shah.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1213987 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3954a2fb1c
commit
b7ae5a6cb7
@ -282,6 +282,10 @@ Release 0.23.1 - Unreleased
|
|||||||
MAPREDUCE-3542. Support "FileSystemCounter" legacy counter group name for
|
MAPREDUCE-3542. Support "FileSystemCounter" legacy counter group name for
|
||||||
compatibility. (tomwhite)
|
compatibility. (tomwhite)
|
||||||
|
|
||||||
|
MAPREDUCE-3426. Fixed MR AM in uber mode to write map intermediate outputs
|
||||||
|
in the correct directory to work properly in secure mode. (Hitesh Shah via
|
||||||
|
vinodkv)
|
||||||
|
|
||||||
Release 0.23.0 - 2011-11-01
|
Release 0.23.0 - 2011-11-01
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
@ -22,20 +22,19 @@
|
|||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.net.URI;
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.concurrent.BlockingQueue;
|
import java.util.concurrent.BlockingQueue;
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
import java.util.concurrent.ThreadPoolExecutor;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.fs.FSError;
|
import org.apache.hadoop.fs.FSError;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.FileContext;
|
import org.apache.hadoop.fs.FileContext;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
||||||
|
import org.apache.hadoop.mapreduce.JobContext;
|
||||||
import org.apache.hadoop.mapreduce.JobCounter;
|
import org.apache.hadoop.mapreduce.JobCounter;
|
||||||
import org.apache.hadoop.mapreduce.MRConfig;
|
import org.apache.hadoop.mapreduce.MRConfig;
|
||||||
import org.apache.hadoop.mapreduce.TypeConverter;
|
import org.apache.hadoop.mapreduce.TypeConverter;
|
||||||
@ -47,13 +46,12 @@
|
|||||||
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
|
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
|
||||||
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
|
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
|
||||||
import org.apache.hadoop.mapreduce.v2.app.job.Job;
|
import org.apache.hadoop.mapreduce.v2.app.job.Job;
|
||||||
import org.apache.hadoop.mapreduce.v2.app.job.Task;
|
|
||||||
import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt;
|
|
||||||
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher;
|
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher;
|
||||||
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent;
|
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent;
|
||||||
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent;
|
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.hadoop.yarn.YarnException;
|
import org.apache.hadoop.yarn.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.api.ApplicationConstants;
|
||||||
import org.apache.hadoop.yarn.service.AbstractService;
|
import org.apache.hadoop.yarn.service.AbstractService;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -80,7 +78,10 @@ public LocalContainerLauncher(AppContext context,
|
|||||||
super(LocalContainerLauncher.class.getName());
|
super(LocalContainerLauncher.class.getName());
|
||||||
this.context = context;
|
this.context = context;
|
||||||
this.umbilical = umbilical;
|
this.umbilical = umbilical;
|
||||||
// umbilical: MRAppMaster creates (taskAttemptListener), passes to us (TODO/FIXME: pointless to use RPC to talk to self; should create LocalTaskAttemptListener or similar: implement umbilical protocol but skip RPC stuff)
|
// umbilical: MRAppMaster creates (taskAttemptListener), passes to us
|
||||||
|
// (TODO/FIXME: pointless to use RPC to talk to self; should create
|
||||||
|
// LocalTaskAttemptListener or similar: implement umbilical protocol
|
||||||
|
// but skip RPC stuff)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
curFC = FileContext.getFileContext(curDir.toURI());
|
curFC = FileContext.getFileContext(curDir.toURI());
|
||||||
@ -152,7 +153,6 @@ public void handle(ContainerLauncherEvent event) {
|
|||||||
* ]]
|
* ]]
|
||||||
* - runs Task (runSubMap() or runSubReduce())
|
* - runs Task (runSubMap() or runSubReduce())
|
||||||
* - TA can safely send TA_UPDATE since in RUNNING state
|
* - TA can safely send TA_UPDATE since in RUNNING state
|
||||||
* [modulo possible TA-state-machine race noted below: CHECK (TODO)]
|
|
||||||
*/
|
*/
|
||||||
private class SubtaskRunner implements Runnable {
|
private class SubtaskRunner implements Runnable {
|
||||||
|
|
||||||
@ -162,6 +162,7 @@ private class SubtaskRunner implements Runnable {
|
|||||||
SubtaskRunner() {
|
SubtaskRunner() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
ContainerLauncherEvent event = null;
|
ContainerLauncherEvent event = null;
|
||||||
@ -183,7 +184,7 @@ public void run() {
|
|||||||
|
|
||||||
ContainerRemoteLaunchEvent launchEv =
|
ContainerRemoteLaunchEvent launchEv =
|
||||||
(ContainerRemoteLaunchEvent)event;
|
(ContainerRemoteLaunchEvent)event;
|
||||||
TaskAttemptId attemptID = launchEv.getTaskAttemptID(); //FIXME: can attemptID ever be null? (only if retrieved over umbilical?)
|
TaskAttemptId attemptID = launchEv.getTaskAttemptID();
|
||||||
|
|
||||||
Job job = context.getAllJobs().get(attemptID.getTaskId().getJobId());
|
Job job = context.getAllJobs().get(attemptID.getTaskId().getJobId());
|
||||||
int numMapTasks = job.getTotalMaps();
|
int numMapTasks = job.getTotalMaps();
|
||||||
@ -204,7 +205,6 @@ public void run() {
|
|||||||
// port number is set to -1 in this case.
|
// port number is set to -1 in this case.
|
||||||
context.getEventHandler().handle(
|
context.getEventHandler().handle(
|
||||||
new TaskAttemptContainerLaunchedEvent(attemptID, -1));
|
new TaskAttemptContainerLaunchedEvent(attemptID, -1));
|
||||||
//FIXME: race condition here? or do we have same kind of lock on TA handler => MapTask can't send TA_UPDATE before TA_CONTAINER_LAUNCHED moves TA to RUNNING state? (probably latter)
|
|
||||||
|
|
||||||
if (numMapTasks == 0) {
|
if (numMapTasks == 0) {
|
||||||
doneWithMaps = true;
|
doneWithMaps = true;
|
||||||
@ -259,6 +259,7 @@ public void run() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
private void runSubtask(org.apache.hadoop.mapred.Task task,
|
private void runSubtask(org.apache.hadoop.mapred.Task task,
|
||||||
final TaskType taskType,
|
final TaskType taskType,
|
||||||
TaskAttemptId attemptID,
|
TaskAttemptId attemptID,
|
||||||
@ -270,6 +271,19 @@ private void runSubtask(org.apache.hadoop.mapred.Task task,
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
JobConf conf = new JobConf(getConfig());
|
JobConf conf = new JobConf(getConfig());
|
||||||
|
conf.set(JobContext.TASK_ID, task.getTaskID().toString());
|
||||||
|
conf.set(JobContext.TASK_ATTEMPT_ID, classicAttemptID.toString());
|
||||||
|
conf.setBoolean(JobContext.TASK_ISMAP, (taskType == TaskType.MAP));
|
||||||
|
conf.setInt(JobContext.TASK_PARTITION, task.getPartition());
|
||||||
|
conf.set(JobContext.ID, task.getJobID().toString());
|
||||||
|
|
||||||
|
// Use the AM's local dir env to generate the intermediate step
|
||||||
|
// output files
|
||||||
|
String[] localSysDirs = StringUtils.getTrimmedStrings(
|
||||||
|
System.getenv(ApplicationConstants.LOCAL_DIR_ENV));
|
||||||
|
conf.setStrings(MRConfig.LOCAL_DIR, localSysDirs);
|
||||||
|
LOG.info(MRConfig.LOCAL_DIR + " for uber task: "
|
||||||
|
+ conf.get(MRConfig.LOCAL_DIR));
|
||||||
|
|
||||||
// mark this as an uberized subtask so it can set task counter
|
// mark this as an uberized subtask so it can set task counter
|
||||||
// (longer-term/FIXME: could redefine as job counter and send
|
// (longer-term/FIXME: could redefine as job counter and send
|
||||||
@ -285,12 +299,12 @@ private void runSubtask(org.apache.hadoop.mapred.Task task,
|
|||||||
if (doneWithMaps) {
|
if (doneWithMaps) {
|
||||||
LOG.error("CONTAINER_REMOTE_LAUNCH contains a map task ("
|
LOG.error("CONTAINER_REMOTE_LAUNCH contains a map task ("
|
||||||
+ attemptID + "), but should be finished with maps");
|
+ attemptID + "), but should be finished with maps");
|
||||||
// throw new RuntimeException() (FIXME: what's appropriate here?)
|
throw new RuntimeException();
|
||||||
}
|
}
|
||||||
|
|
||||||
MapTask map = (MapTask)task;
|
MapTask map = (MapTask)task;
|
||||||
|
map.setConf(conf);
|
||||||
|
|
||||||
//CODE-REVIEWER QUESTION: why not task.getConf() or map.getConf() instead of conf? do we need Task's localizeConfiguration() run on this first?
|
|
||||||
map.run(conf, umbilical);
|
map.run(conf, umbilical);
|
||||||
|
|
||||||
if (renameOutputs) {
|
if (renameOutputs) {
|
||||||
@ -305,19 +319,23 @@ private void runSubtask(org.apache.hadoop.mapred.Task task,
|
|||||||
} else /* TaskType.REDUCE */ {
|
} else /* TaskType.REDUCE */ {
|
||||||
|
|
||||||
if (!doneWithMaps) {
|
if (!doneWithMaps) {
|
||||||
//check if event-queue empty? whole idea of counting maps vs. checking event queue is a tad wacky...but could enforce ordering (assuming no "lost events") at LocalMRAppMaster [CURRENT BUG(?): doesn't send reduce event until maps all done]
|
// check if event-queue empty? whole idea of counting maps vs.
|
||||||
|
// checking event queue is a tad wacky...but could enforce ordering
|
||||||
|
// (assuming no "lost events") at LocalMRAppMaster [CURRENT BUG(?):
|
||||||
|
// doesn't send reduce event until maps all done]
|
||||||
LOG.error("CONTAINER_REMOTE_LAUNCH contains a reduce task ("
|
LOG.error("CONTAINER_REMOTE_LAUNCH contains a reduce task ("
|
||||||
+ attemptID + "), but not yet finished with maps");
|
+ attemptID + "), but not yet finished with maps");
|
||||||
// throw new RuntimeException() (FIXME) // or push reduce event back onto end of queue? (probably former)
|
throw new RuntimeException();
|
||||||
}
|
}
|
||||||
|
|
||||||
ReduceTask reduce = (ReduceTask)task;
|
|
||||||
|
|
||||||
// a.k.a. "mapreduce.jobtracker.address" in LocalJobRunner:
|
// a.k.a. "mapreduce.jobtracker.address" in LocalJobRunner:
|
||||||
// set framework name to local to make task local
|
// set framework name to local to make task local
|
||||||
conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
|
conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
|
||||||
conf.set(MRConfig.MASTER_ADDRESS, "local"); // bypass shuffle
|
conf.set(MRConfig.MASTER_ADDRESS, "local"); // bypass shuffle
|
||||||
|
|
||||||
|
ReduceTask reduce = (ReduceTask)task;
|
||||||
|
reduce.setConf(conf);
|
||||||
|
|
||||||
reduce.run(conf, umbilical);
|
reduce.run(conf, umbilical);
|
||||||
//relocalize(); // needed only if more than one reducer supported (is MAPREDUCE-434 fixed yet?)
|
//relocalize(); // needed only if more than one reducer supported (is MAPREDUCE-434 fixed yet?)
|
||||||
}
|
}
|
||||||
@ -334,18 +352,7 @@ private void runSubtask(org.apache.hadoop.mapred.Task task,
|
|||||||
try {
|
try {
|
||||||
if (task != null) {
|
if (task != null) {
|
||||||
// do cleanup for the task
|
// do cleanup for the task
|
||||||
// if (childUGI == null) { // no need to job into doAs block
|
task.taskCleanup(umbilical);
|
||||||
task.taskCleanup(umbilical);
|
|
||||||
// } else {
|
|
||||||
// final Task taskFinal = task;
|
|
||||||
// childUGI.doAs(new PrivilegedExceptionAction<Object>() {
|
|
||||||
// @Override
|
|
||||||
// public Object run() throws Exception {
|
|
||||||
// taskFinal.taskCleanup(umbilical);
|
|
||||||
// return null;
|
|
||||||
// }
|
|
||||||
// });
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.info("Exception cleaning up: "
|
LOG.info("Exception cleaning up: "
|
||||||
@ -354,51 +361,21 @@ private void runSubtask(org.apache.hadoop.mapred.Task task,
|
|||||||
// Report back any failures, for diagnostic purposes
|
// Report back any failures, for diagnostic purposes
|
||||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||||
exception.printStackTrace(new PrintStream(baos));
|
exception.printStackTrace(new PrintStream(baos));
|
||||||
// if (classicAttemptID != null) {
|
umbilical.reportDiagnosticInfo(classicAttemptID, baos.toString());
|
||||||
umbilical.reportDiagnosticInfo(classicAttemptID, baos.toString());
|
|
||||||
// }
|
|
||||||
throw new RuntimeException();
|
throw new RuntimeException();
|
||||||
|
|
||||||
} catch (Throwable throwable) {
|
} catch (Throwable throwable) {
|
||||||
LOG.fatal("Error running local (uberized) 'child' : "
|
LOG.fatal("Error running local (uberized) 'child' : "
|
||||||
+ StringUtils.stringifyException(throwable));
|
+ StringUtils.stringifyException(throwable));
|
||||||
// if (classicAttemptID != null) {
|
Throwable tCause = throwable.getCause();
|
||||||
Throwable tCause = throwable.getCause();
|
String cause = (tCause == null)
|
||||||
String cause = (tCause == null)
|
? throwable.getMessage()
|
||||||
? throwable.getMessage()
|
: StringUtils.stringifyException(tCause);
|
||||||
: StringUtils.stringifyException(tCause);
|
umbilical.fatalError(classicAttemptID, cause);
|
||||||
umbilical.fatalError(classicAttemptID, cause);
|
|
||||||
// }
|
|
||||||
throw new RuntimeException();
|
throw new RuntimeException();
|
||||||
|
|
||||||
} finally {
|
|
||||||
/*
|
|
||||||
FIXME: do we need to do any of this stuff? (guessing not since not in own JVM)
|
|
||||||
RPC.stopProxy(umbilical);
|
|
||||||
DefaultMetricsSystem.shutdown();
|
|
||||||
// Shutting down log4j of the child-vm...
|
|
||||||
// This assumes that on return from Task.run()
|
|
||||||
// there is no more logging done.
|
|
||||||
LogManager.shutdown();
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* FIXME: may not need renameMapOutputForReduce() anymore? TEST!
|
|
||||||
|
|
||||||
${local.dir}/usercache/$user/appcache/$appId/$contId/ == $cwd for containers;
|
|
||||||
contains launch_container.sh script, which, when executed, creates symlinks and
|
|
||||||
sets up env
|
|
||||||
"$local.dir"/usercache/$user/appcache/$appId/$contId/file.out
|
|
||||||
"$local.dir"/usercache/$user/appcache/$appId/$contId/file.out.idx (?)
|
|
||||||
"$local.dir"/usercache/$user/appcache/$appId/output/$taskId/ is where file.out* is moved after MapTask done
|
|
||||||
|
|
||||||
OHO! no further need for this at all? $taskId is unique per subtask
|
|
||||||
now => should work fine to leave alone. TODO: test with teragen or
|
|
||||||
similar
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Within the _local_ filesystem (not HDFS), all activity takes place within
|
* Within the _local_ filesystem (not HDFS), all activity takes place within
|
||||||
* a single subdir (${local.dir}/usercache/$user/appcache/$appId/$contId/),
|
* a single subdir (${local.dir}/usercache/$user/appcache/$appId/$contId/),
|
||||||
@ -409,14 +386,21 @@ private void runSubtask(org.apache.hadoop.mapred.Task task,
|
|||||||
* filenames instead of "file.out". (All of this is entirely internal,
|
* filenames instead of "file.out". (All of this is entirely internal,
|
||||||
* so there are no particular compatibility issues.)
|
* so there are no particular compatibility issues.)
|
||||||
*/
|
*/
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
private void renameMapOutputForReduce(JobConf conf, TaskAttemptId mapId,
|
private void renameMapOutputForReduce(JobConf conf, TaskAttemptId mapId,
|
||||||
MapOutputFile subMapOutputFile)
|
MapOutputFile subMapOutputFile)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
FileSystem localFs = FileSystem.getLocal(conf);
|
FileSystem localFs = FileSystem.getLocal(conf);
|
||||||
// move map output to reduce input
|
// move map output to reduce input
|
||||||
Path mapOut = subMapOutputFile.getOutputFile();
|
Path mapOut = subMapOutputFile.getOutputFile();
|
||||||
|
FileStatus mStatus = localFs.getFileStatus(mapOut);
|
||||||
Path reduceIn = subMapOutputFile.getInputFileForWrite(
|
Path reduceIn = subMapOutputFile.getInputFileForWrite(
|
||||||
TypeConverter.fromYarn(mapId).getTaskID(), localFs.getLength(mapOut));
|
TypeConverter.fromYarn(mapId).getTaskID(), mStatus.getLen());
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Renaming map output file for task attempt "
|
||||||
|
+ mapId.toString() + " from original location " + mapOut.toString()
|
||||||
|
+ " to destination " + reduceIn.toString());
|
||||||
|
}
|
||||||
if (!localFs.mkdirs(reduceIn.getParent())) {
|
if (!localFs.mkdirs(reduceIn.getParent())) {
|
||||||
throw new IOException("Mkdirs failed to create "
|
throw new IOException("Mkdirs failed to create "
|
||||||
+ reduceIn.getParent().toString());
|
+ reduceIn.getParent().toString());
|
||||||
@ -429,8 +413,7 @@ private void renameMapOutputForReduce(JobConf conf, TaskAttemptId mapId,
|
|||||||
* Also within the local filesystem, we need to restore the initial state
|
* Also within the local filesystem, we need to restore the initial state
|
||||||
* of the directory as much as possible. Compare current contents against
|
* of the directory as much as possible. Compare current contents against
|
||||||
* the saved original state and nuke everything that doesn't belong, with
|
* the saved original state and nuke everything that doesn't belong, with
|
||||||
* the exception of the renamed map outputs (see above).
|
* the exception of the renamed map outputs.
|
||||||
FIXME: do we really need to worry about renamed map outputs, or already moved to output dir on commit? if latter, fix comment
|
|
||||||
*
|
*
|
||||||
* Any jobs that go out of their way to rename or delete things from the
|
* Any jobs that go out of their way to rename or delete things from the
|
||||||
* local directory are considered broken and deserve what they get...
|
* local directory are considered broken and deserve what they get...
|
||||||
|
@ -236,6 +236,13 @@ public static List<String> getVMCommand(
|
|||||||
getTaskLogFile(TaskLog.LogName.PROFILE)
|
getTaskLogFile(TaskLog.LogName.PROFILE)
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
if (task.isMapTask()) {
|
||||||
|
vargs.add(conf.get(MRJobConfig.TASK_MAP_PROFILE_PARAMS, ""));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
vargs.add(conf.get(MRJobConfig.TASK_REDUCE_PROFILE_PARAMS, ""));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -156,6 +156,7 @@ public class MRAppMaster extends CompositeService {
|
|||||||
private OutputCommitter committer;
|
private OutputCommitter committer;
|
||||||
private JobEventDispatcher jobEventDispatcher;
|
private JobEventDispatcher jobEventDispatcher;
|
||||||
private boolean inRecovery = false;
|
private boolean inRecovery = false;
|
||||||
|
private SpeculatorEventDispatcher speculatorEventDispatcher;
|
||||||
|
|
||||||
private Job job;
|
private Job job;
|
||||||
private Credentials fsTokens = new Credentials(); // Filled during init
|
private Credentials fsTokens = new Credentials(); // Filled during init
|
||||||
@ -265,8 +266,9 @@ public void init(final Configuration conf) {
|
|||||||
addIfService(speculator);
|
addIfService(speculator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
speculatorEventDispatcher = new SpeculatorEventDispatcher(conf);
|
||||||
dispatcher.register(Speculator.EventType.class,
|
dispatcher.register(Speculator.EventType.class,
|
||||||
new SpeculatorEventDispatcher(conf));
|
speculatorEventDispatcher);
|
||||||
|
|
||||||
// service to allocate containers from RM (if non-uber) or to fake it (uber)
|
// service to allocate containers from RM (if non-uber) or to fake it (uber)
|
||||||
containerAllocator = createContainerAllocator(clientService, context);
|
containerAllocator = createContainerAllocator(clientService, context);
|
||||||
@ -386,7 +388,7 @@ public void handle(JobFinishEvent event) {
|
|||||||
// This will also send the final report to the ResourceManager
|
// This will also send the final report to the ResourceManager
|
||||||
LOG.info("Calling stop for all the services");
|
LOG.info("Calling stop for all the services");
|
||||||
stop();
|
stop();
|
||||||
|
|
||||||
// Send job-end notification
|
// Send job-end notification
|
||||||
try {
|
try {
|
||||||
LOG.info("Job end notification started for jobID : "
|
LOG.info("Job end notification started for jobID : "
|
||||||
@ -401,14 +403,14 @@ public void handle(JobFinishEvent event) {
|
|||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
LOG.warn("Graceful stop failed ", t);
|
LOG.warn("Graceful stop failed ", t);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cleanup staging directory
|
// Cleanup staging directory
|
||||||
try {
|
try {
|
||||||
cleanupStagingDir();
|
cleanupStagingDir();
|
||||||
} catch(IOException io) {
|
} catch(IOException io) {
|
||||||
LOG.warn("Failed to delete staging dir");
|
LOG.warn("Failed to delete staging dir");
|
||||||
}
|
}
|
||||||
|
|
||||||
//Bring the process down by force.
|
//Bring the process down by force.
|
||||||
//Not needed after HADOOP-7140
|
//Not needed after HADOOP-7140
|
||||||
LOG.info("Exiting MR AppMaster..GoodBye!");
|
LOG.info("Exiting MR AppMaster..GoodBye!");
|
||||||
@ -790,10 +792,6 @@ public void start() {
|
|||||||
// job-init to be done completely here.
|
// job-init to be done completely here.
|
||||||
jobEventDispatcher.handle(initJobEvent);
|
jobEventDispatcher.handle(initJobEvent);
|
||||||
|
|
||||||
// send init to speculator. This won't yest start as dispatcher isn't
|
|
||||||
// started yet.
|
|
||||||
dispatcher.getEventHandler().handle(
|
|
||||||
new SpeculatorEvent(job.getID(), clock.getTime()));
|
|
||||||
|
|
||||||
// JobImpl's InitTransition is done (call above is synchronous), so the
|
// JobImpl's InitTransition is done (call above is synchronous), so the
|
||||||
// "uber-decision" (MR-1220) has been made. Query job and switch to
|
// "uber-decision" (MR-1220) has been made. Query job and switch to
|
||||||
@ -801,9 +799,15 @@ public void start() {
|
|||||||
// and container-launcher services/event-handlers).
|
// and container-launcher services/event-handlers).
|
||||||
|
|
||||||
if (job.isUber()) {
|
if (job.isUber()) {
|
||||||
|
speculatorEventDispatcher.disableSpeculation();
|
||||||
LOG.info("MRAppMaster uberizing job " + job.getID()
|
LOG.info("MRAppMaster uberizing job " + job.getID()
|
||||||
+ " in local container (\"uber-AM\").");
|
+ " in local container (\"uber-AM\") on node "
|
||||||
|
+ nmHost + ":" + nmPort + ".");
|
||||||
} else {
|
} else {
|
||||||
|
// send init to speculator only for non-uber jobs.
|
||||||
|
// This won't yet start as dispatcher isn't started yet.
|
||||||
|
dispatcher.getEventHandler().handle(
|
||||||
|
new SpeculatorEvent(job.getID(), clock.getTime()));
|
||||||
LOG.info("MRAppMaster launching normal, non-uberized, multi-container "
|
LOG.info("MRAppMaster launching normal, non-uberized, multi-container "
|
||||||
+ "job " + job.getID() + ".");
|
+ "job " + job.getID() + ".");
|
||||||
}
|
}
|
||||||
@ -865,17 +869,24 @@ public void handle(TaskAttemptEvent event) {
|
|||||||
private class SpeculatorEventDispatcher implements
|
private class SpeculatorEventDispatcher implements
|
||||||
EventHandler<SpeculatorEvent> {
|
EventHandler<SpeculatorEvent> {
|
||||||
private final Configuration conf;
|
private final Configuration conf;
|
||||||
|
private volatile boolean disabled;
|
||||||
public SpeculatorEventDispatcher(Configuration config) {
|
public SpeculatorEventDispatcher(Configuration config) {
|
||||||
this.conf = config;
|
this.conf = config;
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
public void handle(SpeculatorEvent event) {
|
public void handle(SpeculatorEvent event) {
|
||||||
if (conf.getBoolean(MRJobConfig.MAP_SPECULATIVE, false)
|
if (!disabled &&
|
||||||
|| conf.getBoolean(MRJobConfig.REDUCE_SPECULATIVE, false)) {
|
(conf.getBoolean(MRJobConfig.MAP_SPECULATIVE, false)
|
||||||
|
|| conf.getBoolean(MRJobConfig.REDUCE_SPECULATIVE, false))) {
|
||||||
// Speculator IS enabled, direct the event to there.
|
// Speculator IS enabled, direct the event to there.
|
||||||
speculator.handle(event);
|
speculator.handle(event);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void disableSpeculation() {
|
||||||
|
disabled = true;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void validateInputParam(String value, String param)
|
private static void validateInputParam(String value, String param)
|
||||||
|
@ -54,6 +54,7 @@
|
|||||||
import org.apache.hadoop.mapreduce.jobhistory.JobUnsuccessfulCompletionEvent;
|
import org.apache.hadoop.mapreduce.jobhistory.JobUnsuccessfulCompletionEvent;
|
||||||
import org.apache.hadoop.mapreduce.lib.chain.ChainMapper;
|
import org.apache.hadoop.mapreduce.lib.chain.ChainMapper;
|
||||||
import org.apache.hadoop.mapreduce.lib.chain.ChainReducer;
|
import org.apache.hadoop.mapreduce.lib.chain.ChainReducer;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||||
import org.apache.hadoop.mapreduce.security.TokenCache;
|
import org.apache.hadoop.mapreduce.security.TokenCache;
|
||||||
import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
|
import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
|
||||||
import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
|
import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
|
||||||
@ -583,13 +584,13 @@ public JobReport getReport() {
|
|||||||
if (getState() == JobState.NEW) {
|
if (getState() == JobState.NEW) {
|
||||||
return MRBuilderUtils.newJobReport(jobId, jobName, username, state,
|
return MRBuilderUtils.newJobReport(jobId, jobName, username, state,
|
||||||
appSubmitTime, startTime, finishTime, setupProgress, 0.0f, 0.0f,
|
appSubmitTime, startTime, finishTime, setupProgress, 0.0f, 0.0f,
|
||||||
cleanupProgress, remoteJobConfFile.toString(), amInfos);
|
cleanupProgress, remoteJobConfFile.toString(), amInfos, isUber);
|
||||||
}
|
}
|
||||||
|
|
||||||
return MRBuilderUtils.newJobReport(jobId, jobName, username, state,
|
return MRBuilderUtils.newJobReport(jobId, jobName, username, state,
|
||||||
appSubmitTime, startTime, finishTime, setupProgress,
|
appSubmitTime, startTime, finishTime, setupProgress,
|
||||||
computeProgress(mapTasks), computeProgress(reduceTasks),
|
computeProgress(mapTasks), computeProgress(reduceTasks),
|
||||||
cleanupProgress, remoteJobConfFile.toString(), amInfos);
|
cleanupProgress, remoteJobConfFile.toString(), amInfos, isUber);
|
||||||
} finally {
|
} finally {
|
||||||
readLock.unlock();
|
readLock.unlock();
|
||||||
}
|
}
|
||||||
@ -812,6 +813,129 @@ public List<AMInfo> getAMInfos() {
|
|||||||
return amInfos;
|
return amInfos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decide whether job can be run in uber mode based on various criteria.
|
||||||
|
* @param dataInputLength Total length for all splits
|
||||||
|
*/
|
||||||
|
private void makeUberDecision(long dataInputLength) {
|
||||||
|
//FIXME: need new memory criterion for uber-decision (oops, too late here;
|
||||||
|
// until AM-resizing supported,
|
||||||
|
// must depend on job client to pass fat-slot needs)
|
||||||
|
// these are no longer "system" settings, necessarily; user may override
|
||||||
|
int sysMaxMaps = conf.getInt(MRJobConfig.JOB_UBERTASK_MAXMAPS, 9);
|
||||||
|
|
||||||
|
//FIXME: handling multiple reduces within a single AM does not seem to
|
||||||
|
//work.
|
||||||
|
// int sysMaxReduces =
|
||||||
|
// job.conf.getInt(MRJobConfig.JOB_UBERTASK_MAXREDUCES, 1);
|
||||||
|
int sysMaxReduces = 1;
|
||||||
|
|
||||||
|
long sysMaxBytes = conf.getLong(MRJobConfig.JOB_UBERTASK_MAXBYTES,
|
||||||
|
conf.getLong("dfs.block.size", 64*1024*1024)); //FIXME: this is
|
||||||
|
// wrong; get FS from [File?]InputFormat and default block size from that
|
||||||
|
|
||||||
|
long sysMemSizeForUberSlot =
|
||||||
|
conf.getInt(MRJobConfig.MR_AM_VMEM_MB,
|
||||||
|
MRJobConfig.DEFAULT_MR_AM_VMEM_MB);
|
||||||
|
|
||||||
|
boolean uberEnabled =
|
||||||
|
conf.getBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
|
||||||
|
boolean smallNumMapTasks = (numMapTasks <= sysMaxMaps);
|
||||||
|
boolean smallNumReduceTasks = (numReduceTasks <= sysMaxReduces);
|
||||||
|
boolean smallInput = (dataInputLength <= sysMaxBytes);
|
||||||
|
// ignoring overhead due to UberAM and statics as negligible here:
|
||||||
|
boolean smallMemory =
|
||||||
|
( (Math.max(conf.getLong(MRJobConfig.MAP_MEMORY_MB, 0),
|
||||||
|
conf.getLong(MRJobConfig.REDUCE_MEMORY_MB, 0))
|
||||||
|
<= sysMemSizeForUberSlot)
|
||||||
|
|| (sysMemSizeForUberSlot == JobConf.DISABLED_MEMORY_LIMIT));
|
||||||
|
boolean notChainJob = !isChainJob(conf);
|
||||||
|
|
||||||
|
// User has overall veto power over uberization, or user can modify
|
||||||
|
// limits (overriding system settings and potentially shooting
|
||||||
|
// themselves in the head). Note that ChainMapper/Reducer are
|
||||||
|
// fundamentally incompatible with MR-1220; they employ a blocking
|
||||||
|
// queue between the maps/reduces and thus require parallel execution,
|
||||||
|
// while "uber-AM" (MR AM + LocalContainerLauncher) loops over tasks
|
||||||
|
// and thus requires sequential execution.
|
||||||
|
isUber = uberEnabled && smallNumMapTasks && smallNumReduceTasks
|
||||||
|
&& smallInput && smallMemory && notChainJob;
|
||||||
|
|
||||||
|
if (isUber) {
|
||||||
|
LOG.info("Uberizing job " + jobId + ": " + numMapTasks + "m+"
|
||||||
|
+ numReduceTasks + "r tasks (" + dataInputLength
|
||||||
|
+ " input bytes) will run sequentially on single node.");
|
||||||
|
|
||||||
|
// make sure reduces are scheduled only after all map are completed
|
||||||
|
conf.setFloat(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART,
|
||||||
|
1.0f);
|
||||||
|
// uber-subtask attempts all get launched on same node; if one fails,
|
||||||
|
// probably should retry elsewhere, i.e., move entire uber-AM: ergo,
|
||||||
|
// limit attempts to 1 (or at most 2? probably not...)
|
||||||
|
conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 1);
|
||||||
|
conf.setInt(MRJobConfig.REDUCE_MAX_ATTEMPTS, 1);
|
||||||
|
|
||||||
|
// disable speculation
|
||||||
|
conf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
|
||||||
|
conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
|
||||||
|
} else {
|
||||||
|
StringBuilder msg = new StringBuilder();
|
||||||
|
msg.append("Not uberizing ").append(jobId).append(" because:");
|
||||||
|
if (!uberEnabled)
|
||||||
|
msg.append(" not enabled;");
|
||||||
|
if (!smallNumMapTasks)
|
||||||
|
msg.append(" too many maps;");
|
||||||
|
if (!smallNumReduceTasks)
|
||||||
|
msg.append(" too many reduces;");
|
||||||
|
if (!smallInput)
|
||||||
|
msg.append(" too much input;");
|
||||||
|
if (!smallMemory)
|
||||||
|
msg.append(" too much RAM;");
|
||||||
|
if (!notChainJob)
|
||||||
|
msg.append(" chainjob");
|
||||||
|
LOG.info(msg.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ChainMapper and ChainReducer must execute in parallel, so they're not
|
||||||
|
* compatible with uberization/LocalContainerLauncher (100% sequential).
|
||||||
|
*/
|
||||||
|
private boolean isChainJob(Configuration conf) {
|
||||||
|
boolean isChainJob = false;
|
||||||
|
try {
|
||||||
|
String mapClassName = conf.get(MRJobConfig.MAP_CLASS_ATTR);
|
||||||
|
if (mapClassName != null) {
|
||||||
|
Class<?> mapClass = Class.forName(mapClassName);
|
||||||
|
if (ChainMapper.class.isAssignableFrom(mapClass))
|
||||||
|
isChainJob = true;
|
||||||
|
}
|
||||||
|
} catch (ClassNotFoundException cnfe) {
|
||||||
|
// don't care; assume it's not derived from ChainMapper
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
String reduceClassName = conf.get(MRJobConfig.REDUCE_CLASS_ATTR);
|
||||||
|
if (reduceClassName != null) {
|
||||||
|
Class<?> reduceClass = Class.forName(reduceClassName);
|
||||||
|
if (ChainReducer.class.isAssignableFrom(reduceClass))
|
||||||
|
isChainJob = true;
|
||||||
|
}
|
||||||
|
} catch (ClassNotFoundException cnfe) {
|
||||||
|
// don't care; assume it's not derived from ChainReducer
|
||||||
|
}
|
||||||
|
return isChainJob;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
private int getBlockSize() {
|
||||||
|
String inputClassName = conf.get(MRJobConfig.INPUT_FORMAT_CLASS_ATTR);
|
||||||
|
if (inputClassName != null) {
|
||||||
|
Class<?> inputClass - Class.forName(inputClassName);
|
||||||
|
if (FileInputFormat<K, V>)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
public static class InitTransition
|
public static class InitTransition
|
||||||
implements MultipleArcTransition<JobImpl, JobEvent, JobState> {
|
implements MultipleArcTransition<JobImpl, JobEvent, JobState> {
|
||||||
|
|
||||||
@ -863,81 +987,8 @@ public JobState transition(JobImpl job, JobEvent event) {
|
|||||||
inputLength += taskSplitMetaInfo[i].getInputDataLength();
|
inputLength += taskSplitMetaInfo[i].getInputDataLength();
|
||||||
}
|
}
|
||||||
|
|
||||||
//FIXME: need new memory criterion for uber-decision (oops, too late here;
|
job.makeUberDecision(inputLength);
|
||||||
// until AM-resizing supported, must depend on job client to pass fat-slot needs)
|
|
||||||
// these are no longer "system" settings, necessarily; user may override
|
|
||||||
int sysMaxMaps = job.conf.getInt(MRJobConfig.JOB_UBERTASK_MAXMAPS, 9);
|
|
||||||
int sysMaxReduces =
|
|
||||||
job.conf.getInt(MRJobConfig.JOB_UBERTASK_MAXREDUCES, 1);
|
|
||||||
long sysMaxBytes = job.conf.getLong(MRJobConfig.JOB_UBERTASK_MAXBYTES,
|
|
||||||
job.conf.getLong("dfs.block.size", 64*1024*1024)); //FIXME: this is
|
|
||||||
// wrong; get FS from [File?]InputFormat and default block size from that
|
|
||||||
//long sysMemSizeForUberSlot = JobTracker.getMemSizeForReduceSlot();
|
|
||||||
// FIXME [could use default AM-container memory size...]
|
|
||||||
|
|
||||||
boolean uberEnabled =
|
|
||||||
job.conf.getBoolean(MRJobConfig.JOB_UBERTASK_ENABLE, false);
|
|
||||||
boolean smallNumMapTasks = (job.numMapTasks <= sysMaxMaps);
|
|
||||||
boolean smallNumReduceTasks = (job.numReduceTasks <= sysMaxReduces);
|
|
||||||
boolean smallInput = (inputLength <= sysMaxBytes);
|
|
||||||
boolean smallMemory = true; //FIXME (see above)
|
|
||||||
// ignoring overhead due to UberTask and statics as negligible here:
|
|
||||||
// FIXME && (Math.max(memoryPerMap, memoryPerReduce) <= sysMemSizeForUberSlot
|
|
||||||
// || sysMemSizeForUberSlot == JobConf.DISABLED_MEMORY_LIMIT)
|
|
||||||
boolean notChainJob = !isChainJob(job.conf);
|
|
||||||
|
|
||||||
// User has overall veto power over uberization, or user can modify
|
|
||||||
// limits (overriding system settings and potentially shooting
|
|
||||||
// themselves in the head). Note that ChainMapper/Reducer are
|
|
||||||
// fundamentally incompatible with MR-1220; they employ a blocking
|
|
||||||
|
|
||||||
// User has overall veto power over uberization, or user can modify
|
|
||||||
// limits (overriding system settings and potentially shooting
|
|
||||||
// themselves in the head). Note that ChainMapper/Reducer are
|
|
||||||
// fundamentally incompatible with MR-1220; they employ a blocking
|
|
||||||
// queue between the maps/reduces and thus require parallel execution,
|
|
||||||
// while "uber-AM" (MR AM + LocalContainerLauncher) loops over tasks
|
|
||||||
// and thus requires sequential execution.
|
|
||||||
job.isUber = uberEnabled && smallNumMapTasks && smallNumReduceTasks
|
|
||||||
&& smallInput && smallMemory && notChainJob;
|
|
||||||
|
|
||||||
if (job.isUber) {
|
|
||||||
LOG.info("Uberizing job " + job.jobId + ": " + job.numMapTasks + "m+"
|
|
||||||
+ job.numReduceTasks + "r tasks (" + inputLength
|
|
||||||
+ " input bytes) will run sequentially on single node.");
|
|
||||||
//TODO: also note which node?
|
|
||||||
|
|
||||||
// make sure reduces are scheduled only after all map are completed
|
|
||||||
job.conf.setFloat(MRJobConfig.COMPLETED_MAPS_FOR_REDUCE_SLOWSTART,
|
|
||||||
1.0f);
|
|
||||||
// uber-subtask attempts all get launched on same node; if one fails,
|
|
||||||
// probably should retry elsewhere, i.e., move entire uber-AM: ergo,
|
|
||||||
// limit attempts to 1 (or at most 2? probably not...)
|
|
||||||
job.conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 1);
|
|
||||||
job.conf.setInt(MRJobConfig.REDUCE_MAX_ATTEMPTS, 1);
|
|
||||||
|
|
||||||
// disable speculation: makes no sense to speculate an entire job
|
|
||||||
//canSpeculateMaps = canSpeculateReduces = false; // [TODO: in old
|
|
||||||
//version, ultimately was from conf.getMapSpeculativeExecution(),
|
|
||||||
//conf.getReduceSpeculativeExecution()]
|
|
||||||
} else {
|
|
||||||
StringBuilder msg = new StringBuilder();
|
|
||||||
msg.append("Not uberizing ").append(job.jobId).append(" because:");
|
|
||||||
if (!uberEnabled)
|
|
||||||
msg.append(" not enabled;");
|
|
||||||
if (!smallNumMapTasks)
|
|
||||||
msg.append(" too many maps;");
|
|
||||||
if (!smallNumReduceTasks)
|
|
||||||
msg.append(" too many reduces;");
|
|
||||||
if (!smallInput)
|
|
||||||
msg.append(" too much input;");
|
|
||||||
if (!smallMemory)
|
|
||||||
msg.append(" too much RAM;");
|
|
||||||
if (!notChainJob)
|
|
||||||
msg.append(" chainjob");
|
|
||||||
LOG.info(msg.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
job.taskAttemptCompletionEvents =
|
job.taskAttemptCompletionEvents =
|
||||||
new ArrayList<TaskAttemptCompletionEvent>(
|
new ArrayList<TaskAttemptCompletionEvent>(
|
||||||
job.numMapTasks + job.numReduceTasks + 10);
|
job.numMapTasks + job.numReduceTasks + 10);
|
||||||
@ -1008,35 +1059,6 @@ protected void setup(JobImpl job) throws IOException {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* ChainMapper and ChainReducer must execute in parallel, so they're not
|
|
||||||
* compatible with uberization/LocalContainerLauncher (100% sequential).
|
|
||||||
*/
|
|
||||||
boolean isChainJob(Configuration conf) {
|
|
||||||
boolean isChainJob = false;
|
|
||||||
try {
|
|
||||||
String mapClassName = conf.get(MRJobConfig.MAP_CLASS_ATTR);
|
|
||||||
if (mapClassName != null) {
|
|
||||||
Class<?> mapClass = Class.forName(mapClassName);
|
|
||||||
if (ChainMapper.class.isAssignableFrom(mapClass))
|
|
||||||
isChainJob = true;
|
|
||||||
}
|
|
||||||
} catch (ClassNotFoundException cnfe) {
|
|
||||||
// don't care; assume it's not derived from ChainMapper
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
String reduceClassName = conf.get(MRJobConfig.REDUCE_CLASS_ATTR);
|
|
||||||
if (reduceClassName != null) {
|
|
||||||
Class<?> reduceClass = Class.forName(reduceClassName);
|
|
||||||
if (ChainReducer.class.isAssignableFrom(reduceClass))
|
|
||||||
isChainJob = true;
|
|
||||||
}
|
|
||||||
} catch (ClassNotFoundException cnfe) {
|
|
||||||
// don't care; assume it's not derived from ChainReducer
|
|
||||||
}
|
|
||||||
return isChainJob;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void createMapTasks(JobImpl job, long inputLength,
|
private void createMapTasks(JobImpl job, long inputLength,
|
||||||
TaskSplitMetaInfo[] splits) {
|
TaskSplitMetaInfo[] splits) {
|
||||||
for (int i=0; i < job.numMapTasks; ++i) {
|
for (int i=0; i < job.numMapTasks; ++i) {
|
||||||
|
@ -60,8 +60,8 @@ public class LocalContainerAllocator extends RMCommunicator
|
|||||||
private static final Log LOG =
|
private static final Log LOG =
|
||||||
LogFactory.getLog(LocalContainerAllocator.class);
|
LogFactory.getLog(LocalContainerAllocator.class);
|
||||||
|
|
||||||
|
@SuppressWarnings("rawtypes")
|
||||||
private final EventHandler eventHandler;
|
private final EventHandler eventHandler;
|
||||||
// private final ApplicationId appID;
|
|
||||||
private AtomicInteger containerCount = new AtomicInteger();
|
private AtomicInteger containerCount = new AtomicInteger();
|
||||||
private long retryInterval;
|
private long retryInterval;
|
||||||
private long retrystartTime;
|
private long retrystartTime;
|
||||||
@ -73,8 +73,6 @@ public LocalContainerAllocator(ClientService clientService,
|
|||||||
AppContext context) {
|
AppContext context) {
|
||||||
super(clientService, context);
|
super(clientService, context);
|
||||||
this.eventHandler = context.getEventHandler();
|
this.eventHandler = context.getEventHandler();
|
||||||
// this.appID = context.getApplicationID();
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -88,6 +86,7 @@ public void init(Configuration conf) {
|
|||||||
retrystartTime = System.currentTimeMillis();
|
retrystartTime = System.currentTimeMillis();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
@Override
|
@Override
|
||||||
protected synchronized void heartbeat() throws Exception {
|
protected synchronized void heartbeat() throws Exception {
|
||||||
AllocateRequest allocateRequest = BuilderUtils.newAllocateRequest(
|
AllocateRequest allocateRequest = BuilderUtils.newAllocateRequest(
|
||||||
@ -124,6 +123,7 @@ protected synchronized void heartbeat() throws Exception {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
@Override
|
@Override
|
||||||
public void handle(ContainerAllocatorEvent event) {
|
public void handle(ContainerAllocatorEvent event) {
|
||||||
if (event.getType() == ContainerAllocator.EventType.CONTAINER_REQ) {
|
if (event.getType() == ContainerAllocator.EventType.CONTAINER_REQ) {
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.security.PrivilegedAction;
|
import java.security.PrivilegedAction;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
@ -65,7 +66,7 @@ public abstract class RMCommunicator extends AbstractService {
|
|||||||
private int rmPollInterval;//millis
|
private int rmPollInterval;//millis
|
||||||
protected ApplicationId applicationId;
|
protected ApplicationId applicationId;
|
||||||
protected ApplicationAttemptId applicationAttemptId;
|
protected ApplicationAttemptId applicationAttemptId;
|
||||||
private volatile boolean stopped;
|
private AtomicBoolean stopped;
|
||||||
protected Thread allocatorThread;
|
protected Thread allocatorThread;
|
||||||
protected EventHandler eventHandler;
|
protected EventHandler eventHandler;
|
||||||
protected AMRMProtocol scheduler;
|
protected AMRMProtocol scheduler;
|
||||||
@ -88,6 +89,7 @@ public RMCommunicator(ClientService clientService, AppContext context) {
|
|||||||
this.eventHandler = context.getEventHandler();
|
this.eventHandler = context.getEventHandler();
|
||||||
this.applicationId = context.getApplicationID();
|
this.applicationId = context.getApplicationID();
|
||||||
this.applicationAttemptId = context.getApplicationAttemptId();
|
this.applicationAttemptId = context.getApplicationAttemptId();
|
||||||
|
this.stopped = new AtomicBoolean(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -213,7 +215,10 @@ protected Resource getMaxContainerCapability() {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void stop() {
|
public void stop() {
|
||||||
stopped = true;
|
if (stopped.getAndSet(true)) {
|
||||||
|
// return if already stopped
|
||||||
|
return;
|
||||||
|
}
|
||||||
allocatorThread.interrupt();
|
allocatorThread.interrupt();
|
||||||
try {
|
try {
|
||||||
allocatorThread.join();
|
allocatorThread.join();
|
||||||
@ -228,7 +233,7 @@ protected void startAllocatorThread() {
|
|||||||
allocatorThread = new Thread(new Runnable() {
|
allocatorThread = new Thread(new Runnable() {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
while (!stopped && !Thread.currentThread().isInterrupted()) {
|
while (!stopped.get() && !Thread.currentThread().isInterrupted()) {
|
||||||
try {
|
try {
|
||||||
Thread.sleep(rmPollInterval);
|
Thread.sleep(rmPollInterval);
|
||||||
try {
|
try {
|
||||||
|
@ -183,6 +183,7 @@ private void verifyJobReport(JobReport jr) {
|
|||||||
Assert.assertEquals(1, amInfo.getContainerId().getApplicationAttemptId()
|
Assert.assertEquals(1, amInfo.getContainerId().getApplicationAttemptId()
|
||||||
.getAttemptId());
|
.getAttemptId());
|
||||||
Assert.assertTrue(amInfo.getStartTime() > 0);
|
Assert.assertTrue(amInfo.getStartTime() > 0);
|
||||||
|
Assert.assertEquals(false, jr.isUber());
|
||||||
}
|
}
|
||||||
|
|
||||||
private void verifyTaskAttemptReport(TaskAttemptReport tar) {
|
private void verifyTaskAttemptReport(TaskAttemptReport tar) {
|
||||||
|
@ -118,7 +118,7 @@ public void testSimple() throws Exception {
|
|||||||
Job mockJob = mock(Job.class);
|
Job mockJob = mock(Job.class);
|
||||||
when(mockJob.getReport()).thenReturn(
|
when(mockJob.getReport()).thenReturn(
|
||||||
MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0,
|
MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0,
|
||||||
0, 0, 0, 0, 0, 0, "jobfile", null));
|
0, 0, 0, 0, 0, 0, "jobfile", null, false));
|
||||||
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
|
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
|
||||||
appAttemptId, mockJob);
|
appAttemptId, mockJob);
|
||||||
|
|
||||||
@ -195,7 +195,7 @@ public void testResource() throws Exception {
|
|||||||
Job mockJob = mock(Job.class);
|
Job mockJob = mock(Job.class);
|
||||||
when(mockJob.getReport()).thenReturn(
|
when(mockJob.getReport()).thenReturn(
|
||||||
MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0,
|
MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0,
|
||||||
0, 0, 0, 0, 0, 0, "jobfile", null));
|
0, 0, 0, 0, 0, 0, "jobfile", null, false));
|
||||||
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
|
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
|
||||||
appAttemptId, mockJob);
|
appAttemptId, mockJob);
|
||||||
|
|
||||||
@ -261,7 +261,7 @@ public void testMapReduceScheduling() throws Exception {
|
|||||||
Job mockJob = mock(Job.class);
|
Job mockJob = mock(Job.class);
|
||||||
when(mockJob.getReport()).thenReturn(
|
when(mockJob.getReport()).thenReturn(
|
||||||
MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0,
|
MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0,
|
||||||
0, 0, 0, 0, 0, 0, "jobfile", null));
|
0, 0, 0, 0, 0, 0, "jobfile", null, false));
|
||||||
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
|
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
|
||||||
appAttemptId, mockJob);
|
appAttemptId, mockJob);
|
||||||
|
|
||||||
@ -375,7 +375,7 @@ void setProgress(float setupProgress, float mapProgress,
|
|||||||
public JobReport getReport() {
|
public JobReport getReport() {
|
||||||
return MRBuilderUtils.newJobReport(this.jobId, "job", "user",
|
return MRBuilderUtils.newJobReport(this.jobId, "job", "user",
|
||||||
JobState.RUNNING, 0, 0, 0, this.setupProgress, this.mapProgress,
|
JobState.RUNNING, 0, 0, 0, this.setupProgress, this.mapProgress,
|
||||||
this.reduceProgress, this.cleanupProgress, "jobfile", null);
|
this.reduceProgress, this.cleanupProgress, "jobfile", null, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -511,7 +511,7 @@ public void testBlackListedNodes() throws Exception {
|
|||||||
Job mockJob = mock(Job.class);
|
Job mockJob = mock(Job.class);
|
||||||
when(mockJob.getReport()).thenReturn(
|
when(mockJob.getReport()).thenReturn(
|
||||||
MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0,
|
MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0,
|
||||||
0, 0, 0, 0, 0, 0, "jobfile", null));
|
0, 0, 0, 0, 0, 0, "jobfile", null, false));
|
||||||
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
|
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
|
||||||
appAttemptId, mockJob);
|
appAttemptId, mockJob);
|
||||||
|
|
||||||
@ -610,7 +610,7 @@ public void testBlackListedNodesWithSchedulingToThatNode() throws Exception {
|
|||||||
Job mockJob = mock(Job.class);
|
Job mockJob = mock(Job.class);
|
||||||
when(mockJob.getReport()).thenReturn(
|
when(mockJob.getReport()).thenReturn(
|
||||||
MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0,
|
MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING, 0,
|
||||||
0, 0, 0, 0, 0, 0, "jobfile", null));
|
0, 0, 0, 0, 0, 0, "jobfile", null, false));
|
||||||
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
|
MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
|
||||||
appAttemptId, mockJob);
|
appAttemptId, mockJob);
|
||||||
|
|
||||||
|
@ -288,7 +288,7 @@ public static JobStatus fromYarn(JobReport jobreport, String trackingUrl) {
|
|||||||
.getMapProgress(), jobreport.getReduceProgress(), jobreport
|
.getMapProgress(), jobreport.getReduceProgress(), jobreport
|
||||||
.getCleanupProgress(), fromYarn(jobreport.getJobState()),
|
.getCleanupProgress(), fromYarn(jobreport.getJobState()),
|
||||||
jobPriority, jobreport.getUser(), jobreport.getJobName(), jobreport
|
jobPriority, jobreport.getUser(), jobreport.getJobName(), jobreport
|
||||||
.getJobFile(), trackingUrl);
|
.getJobFile(), trackingUrl, jobreport.isUber());
|
||||||
jobStatus.setFailureInfo(jobreport.getDiagnostics());
|
jobStatus.setFailureInfo(jobreport.getDiagnostics());
|
||||||
return jobStatus;
|
return jobStatus;
|
||||||
}
|
}
|
||||||
@ -421,7 +421,7 @@ public static JobStatus fromYarn(ApplicationReport application,
|
|||||||
TypeConverter.fromYarn(application.getYarnApplicationState()),
|
TypeConverter.fromYarn(application.getYarnApplicationState()),
|
||||||
org.apache.hadoop.mapreduce.JobPriority.NORMAL,
|
org.apache.hadoop.mapreduce.JobPriority.NORMAL,
|
||||||
application.getUser(), application.getName(),
|
application.getUser(), application.getName(),
|
||||||
application.getQueue(), jobFile, trackingUrl
|
application.getQueue(), jobFile, trackingUrl, false
|
||||||
);
|
);
|
||||||
jobStatus.setSchedulingInfo(trackingUrl); // Set AM tracking url
|
jobStatus.setSchedulingInfo(trackingUrl); // Set AM tracking url
|
||||||
jobStatus.setStartTime(application.getStartTime());
|
jobStatus.setStartTime(application.getStartTime());
|
||||||
|
@ -36,6 +36,7 @@ public interface JobReport {
|
|||||||
public abstract String getDiagnostics();
|
public abstract String getDiagnostics();
|
||||||
public abstract String getJobFile();
|
public abstract String getJobFile();
|
||||||
public abstract List<AMInfo> getAMInfos();
|
public abstract List<AMInfo> getAMInfos();
|
||||||
|
public abstract boolean isUber();
|
||||||
|
|
||||||
public abstract void setJobId(JobId jobId);
|
public abstract void setJobId(JobId jobId);
|
||||||
public abstract void setJobState(JobState jobState);
|
public abstract void setJobState(JobState jobState);
|
||||||
@ -52,4 +53,5 @@ public interface JobReport {
|
|||||||
public abstract void setDiagnostics(String diagnostics);
|
public abstract void setDiagnostics(String diagnostics);
|
||||||
public abstract void setJobFile(String jobFile);
|
public abstract void setJobFile(String jobFile);
|
||||||
public abstract void setAMInfos(List<AMInfo> amInfos);
|
public abstract void setAMInfos(List<AMInfo> amInfos);
|
||||||
|
public abstract void setIsUber(boolean isUber);
|
||||||
}
|
}
|
||||||
|
@ -332,4 +332,16 @@ private JobStateProto convertToProtoFormat(JobState e) {
|
|||||||
private JobState convertFromProtoFormat(JobStateProto e) {
|
private JobState convertFromProtoFormat(JobStateProto e) {
|
||||||
return MRProtoUtils.convertFromProtoFormat(e);
|
return MRProtoUtils.convertFromProtoFormat(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized boolean isUber() {
|
||||||
|
JobReportProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
|
return p.getIsUber();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void setIsUber(boolean isUber) {
|
||||||
|
maybeInitBuilder();
|
||||||
|
builder.setIsUber(isUber);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -60,7 +60,8 @@ public static TaskAttemptId newTaskAttemptId(TaskId taskId, int attemptId) {
|
|||||||
public static JobReport newJobReport(JobId jobId, String jobName,
|
public static JobReport newJobReport(JobId jobId, String jobName,
|
||||||
String userName, JobState state, long submitTime, long startTime, long finishTime,
|
String userName, JobState state, long submitTime, long startTime, long finishTime,
|
||||||
float setupProgress, float mapProgress, float reduceProgress,
|
float setupProgress, float mapProgress, float reduceProgress,
|
||||||
float cleanupProgress, String jobFile, List<AMInfo> amInfos) {
|
float cleanupProgress, String jobFile, List<AMInfo> amInfos,
|
||||||
|
boolean isUber) {
|
||||||
JobReport report = Records.newRecord(JobReport.class);
|
JobReport report = Records.newRecord(JobReport.class);
|
||||||
report.setJobId(jobId);
|
report.setJobId(jobId);
|
||||||
report.setJobName(jobName);
|
report.setJobName(jobName);
|
||||||
@ -75,6 +76,7 @@ public static JobReport newJobReport(JobId jobId, String jobName,
|
|||||||
report.setReduceProgress(reduceProgress);
|
report.setReduceProgress(reduceProgress);
|
||||||
report.setJobFile(jobFile);
|
report.setJobFile(jobFile);
|
||||||
report.setAMInfos(amInfos);
|
report.setAMInfos(amInfos);
|
||||||
|
report.setIsUber(isUber);
|
||||||
return report;
|
return report;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -152,6 +152,7 @@ message JobReportProto {
|
|||||||
optional string jobFile = 13;
|
optional string jobFile = 13;
|
||||||
repeated AMInfoProto am_infos = 14;
|
repeated AMInfoProto am_infos = 14;
|
||||||
optional int64 submit_time = 15;
|
optional int64 submit_time = 15;
|
||||||
|
optional bool is_uber = 16 [default = false];
|
||||||
}
|
}
|
||||||
|
|
||||||
message AMInfoProto {
|
message AMInfoProto {
|
||||||
|
@ -97,7 +97,7 @@ public JobStatus(JobID jobid, float mapProgress, float reduceProgress,
|
|||||||
String user, String jobName,
|
String user, String jobName,
|
||||||
String jobFile, String trackingUrl) {
|
String jobFile, String trackingUrl) {
|
||||||
this(jobid, mapProgress, reduceProgress, cleanupProgress, runState,
|
this(jobid, mapProgress, reduceProgress, cleanupProgress, runState,
|
||||||
JobPriority.NORMAL, user, jobName, jobFile, trackingUrl);
|
JobPriority.NORMAL, user, jobName, jobFile, trackingUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -135,7 +135,8 @@ public JobStatus(JobID jobid, float mapProgress, float reduceProgress,
|
|||||||
String user, String jobName, String jobFile,
|
String user, String jobName, String jobFile,
|
||||||
String trackingUrl) {
|
String trackingUrl) {
|
||||||
this(jobid, 0.0f, mapProgress, reduceProgress,
|
this(jobid, 0.0f, mapProgress, reduceProgress,
|
||||||
cleanupProgress, runState, jp, user, jobName, jobFile, trackingUrl);
|
cleanupProgress, runState, jp, user, jobName, jobFile,
|
||||||
|
trackingUrl);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -157,10 +158,57 @@ public JobStatus(JobID jobid, float setupProgress, float mapProgress,
|
|||||||
int runState, JobPriority jp, String user, String jobName,
|
int runState, JobPriority jp, String user, String jobName,
|
||||||
String jobFile, String trackingUrl) {
|
String jobFile, String trackingUrl) {
|
||||||
this(jobid, setupProgress, mapProgress, reduceProgress, cleanupProgress,
|
this(jobid, setupProgress, mapProgress, reduceProgress, cleanupProgress,
|
||||||
runState, jp,
|
runState, jp, user, jobName, "default", jobFile, trackingUrl);
|
||||||
user, jobName, "default", jobFile, trackingUrl);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a job status object for a given jobid.
|
||||||
|
* @param jobid The jobid of the job
|
||||||
|
* @param setupProgress The progress made on the setup
|
||||||
|
* @param mapProgress The progress made on the maps
|
||||||
|
* @param reduceProgress The progress made on the reduces
|
||||||
|
* @param cleanupProgress The progress made on the cleanup
|
||||||
|
* @param runState The current state of the job
|
||||||
|
* @param jp Priority of the job.
|
||||||
|
* @param user userid of the person who submitted the job.
|
||||||
|
* @param jobName user-specified job name.
|
||||||
|
* @param jobFile job configuration file.
|
||||||
|
* @param trackingUrl link to the web-ui for details of the job.
|
||||||
|
* @param isUber Whether job running in uber mode
|
||||||
|
*/
|
||||||
|
public JobStatus(JobID jobid, float setupProgress, float mapProgress,
|
||||||
|
float reduceProgress, float cleanupProgress,
|
||||||
|
int runState, JobPriority jp, String user, String jobName,
|
||||||
|
String jobFile, String trackingUrl, boolean isUber) {
|
||||||
|
this(jobid, setupProgress, mapProgress, reduceProgress, cleanupProgress,
|
||||||
|
runState, jp, user, jobName, "default", jobFile, trackingUrl, isUber);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a job status object for a given jobid.
|
||||||
|
* @param jobid The jobid of the job
|
||||||
|
* @param setupProgress The progress made on the setup
|
||||||
|
* @param mapProgress The progress made on the maps
|
||||||
|
* @param reduceProgress The progress made on the reduces
|
||||||
|
* @param cleanupProgress The progress made on the cleanup
|
||||||
|
* @param runState The current state of the job
|
||||||
|
* @param jp Priority of the job.
|
||||||
|
* @param user userid of the person who submitted the job.
|
||||||
|
* @param jobName user-specified job name.
|
||||||
|
* @param queue job queue name.
|
||||||
|
* @param jobFile job configuration file.
|
||||||
|
* @param trackingUrl link to the web-ui for details of the job.
|
||||||
|
*/
|
||||||
|
public JobStatus(JobID jobid, float setupProgress, float mapProgress,
|
||||||
|
float reduceProgress, float cleanupProgress,
|
||||||
|
int runState, JobPriority jp,
|
||||||
|
String user, String jobName, String queue,
|
||||||
|
String jobFile, String trackingUrl) {
|
||||||
|
this(jobid, setupProgress, mapProgress, reduceProgress, cleanupProgress,
|
||||||
|
runState, jp,
|
||||||
|
user, jobName, queue, jobFile, trackingUrl, false);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a job status object for a given jobid.
|
* Create a job status object for a given jobid.
|
||||||
* @param jobid The jobid of the job
|
* @param jobid The jobid of the job
|
||||||
@ -175,25 +223,25 @@ public JobStatus(JobID jobid, float setupProgress, float mapProgress,
|
|||||||
* @param queue job queue name.
|
* @param queue job queue name.
|
||||||
* @param jobFile job configuration file.
|
* @param jobFile job configuration file.
|
||||||
* @param trackingUrl link to the web-ui for details of the job.
|
* @param trackingUrl link to the web-ui for details of the job.
|
||||||
|
* @param isUber Whether job running in uber mode
|
||||||
*/
|
*/
|
||||||
public JobStatus(JobID jobid, float setupProgress, float mapProgress,
|
public JobStatus(JobID jobid, float setupProgress, float mapProgress,
|
||||||
float reduceProgress, float cleanupProgress,
|
float reduceProgress, float cleanupProgress,
|
||||||
int runState, JobPriority jp,
|
int runState, JobPriority jp,
|
||||||
String user, String jobName, String queue,
|
String user, String jobName, String queue,
|
||||||
String jobFile, String trackingUrl) {
|
String jobFile, String trackingUrl, boolean isUber) {
|
||||||
super(jobid, setupProgress, mapProgress, reduceProgress, cleanupProgress,
|
super(jobid, setupProgress, mapProgress, reduceProgress, cleanupProgress,
|
||||||
getEnum(runState), org.apache.hadoop.mapreduce.JobPriority.valueOf(jp.name()),
|
getEnum(runState), org.apache.hadoop.mapreduce.JobPriority.valueOf(jp.name()),
|
||||||
user, jobName, queue, jobFile, trackingUrl);
|
user, jobName, queue, jobFile, trackingUrl, isUber);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static JobStatus downgrade(org.apache.hadoop.mapreduce.JobStatus stat){
|
public static JobStatus downgrade(org.apache.hadoop.mapreduce.JobStatus stat){
|
||||||
JobStatus old = new JobStatus(JobID.downgrade(stat.getJobID()),
|
JobStatus old = new JobStatus(JobID.downgrade(stat.getJobID()),
|
||||||
stat.getSetupProgress(), stat.getMapProgress(), stat.getReduceProgress(),
|
stat.getSetupProgress(), stat.getMapProgress(), stat.getReduceProgress(),
|
||||||
stat.getCleanupProgress(), stat.getState().getValue(),
|
stat.getCleanupProgress(), stat.getState().getValue(),
|
||||||
JobPriority.valueOf(stat.getPriority().name()),
|
JobPriority.valueOf(stat.getPriority().name()),
|
||||||
stat.getUsername(), stat.getJobName(), stat.getJobFile(),
|
stat.getUsername(), stat.getJobName(), stat.getJobFile(),
|
||||||
stat.getTrackingUrl());
|
stat.getTrackingUrl(), stat.isUber());
|
||||||
old.setStartTime(stat.getStartTime());
|
old.setStartTime(stat.getStartTime());
|
||||||
old.setFinishTime(stat.getFinishTime());
|
old.setFinishTime(stat.getFinishTime());
|
||||||
old.setSchedulingInfo(stat.getSchedulingInfo());
|
old.setSchedulingInfo(stat.getSchedulingInfo());
|
||||||
|
@ -467,6 +467,7 @@ public String toString() {
|
|||||||
sb.append("Job File: ").append(status.getJobFile()).append("\n");
|
sb.append("Job File: ").append(status.getJobFile()).append("\n");
|
||||||
sb.append("Job Tracking URL : ").append(status.getTrackingUrl());
|
sb.append("Job Tracking URL : ").append(status.getTrackingUrl());
|
||||||
sb.append("\n");
|
sb.append("\n");
|
||||||
|
sb.append("Uber job : ").append(status.isUber()).append("\n");
|
||||||
sb.append("map() completion: ");
|
sb.append("map() completion: ");
|
||||||
sb.append(status.getMapProgress()).append("\n");
|
sb.append(status.getMapProgress()).append("\n");
|
||||||
sb.append("reduce() completion: ");
|
sb.append("reduce() completion: ");
|
||||||
@ -1268,12 +1269,20 @@ public boolean monitorAndPrintJob()
|
|||||||
Job.getProgressPollInterval(clientConf);
|
Job.getProgressPollInterval(clientConf);
|
||||||
/* make sure to report full progress after the job is done */
|
/* make sure to report full progress after the job is done */
|
||||||
boolean reportedAfterCompletion = false;
|
boolean reportedAfterCompletion = false;
|
||||||
|
boolean reportedUberMode = false;
|
||||||
while (!isComplete() || !reportedAfterCompletion) {
|
while (!isComplete() || !reportedAfterCompletion) {
|
||||||
if (isComplete()) {
|
if (isComplete()) {
|
||||||
reportedAfterCompletion = true;
|
reportedAfterCompletion = true;
|
||||||
} else {
|
} else {
|
||||||
Thread.sleep(progMonitorPollIntervalMillis);
|
Thread.sleep(progMonitorPollIntervalMillis);
|
||||||
}
|
}
|
||||||
|
if (status.getState() == JobStatus.State.PREP) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!reportedUberMode) {
|
||||||
|
reportedUberMode = true;
|
||||||
|
LOG.info("Job " + jobId + " running in uber mode : " + isUber());
|
||||||
|
}
|
||||||
String report =
|
String report =
|
||||||
(" map " + StringUtils.formatPercent(mapProgress(), 0)+
|
(" map " + StringUtils.formatPercent(mapProgress(), 0)+
|
||||||
" reduce " +
|
" reduce " +
|
||||||
@ -1497,4 +1506,10 @@ public static void setTaskOutputFilter(Configuration conf,
|
|||||||
conf.set(Job.OUTPUT_FILTER, newValue.toString());
|
conf.set(Job.OUTPUT_FILTER, newValue.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isUber() throws IOException, InterruptedException {
|
||||||
|
ensureState(JobState.RUNNING);
|
||||||
|
updateStatus();
|
||||||
|
return status.isUber();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -97,7 +97,7 @@ public int getValue() {
|
|||||||
private int usedMem;
|
private int usedMem;
|
||||||
private int reservedMem;
|
private int reservedMem;
|
||||||
private int neededMem;
|
private int neededMem;
|
||||||
|
private boolean isUber;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*/
|
*/
|
||||||
@ -115,17 +115,17 @@ public JobStatus() {
|
|||||||
* @param jp Priority of the job.
|
* @param jp Priority of the job.
|
||||||
* @param user userid of the person who submitted the job.
|
* @param user userid of the person who submitted the job.
|
||||||
* @param jobName user-specified job name.
|
* @param jobName user-specified job name.
|
||||||
* @param jobFile job configuration file.
|
* @param jobFile job configuration file.
|
||||||
* @param trackingUrl link to the web-ui for details of the job.
|
* @param trackingUrl link to the web-ui for details of the job.
|
||||||
*/
|
*/
|
||||||
public JobStatus(JobID jobid, float setupProgress, float mapProgress,
|
public JobStatus(JobID jobid, float setupProgress, float mapProgress,
|
||||||
float reduceProgress, float cleanupProgress,
|
float reduceProgress, float cleanupProgress,
|
||||||
State runState, JobPriority jp, String user, String jobName,
|
State runState, JobPriority jp, String user, String jobName,
|
||||||
String jobFile, String trackingUrl) {
|
String jobFile, String trackingUrl) {
|
||||||
this(jobid, setupProgress, mapProgress, reduceProgress, cleanupProgress,
|
this(jobid, setupProgress, mapProgress, reduceProgress, cleanupProgress,
|
||||||
runState, jp, user, jobName, "default", jobFile, trackingUrl);
|
runState, jp, user, jobName, "default", jobFile, trackingUrl, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a job status object for a given jobid.
|
* Create a job status object for a given jobid.
|
||||||
* @param jobid The jobid of the job
|
* @param jobid The jobid of the job
|
||||||
@ -138,14 +138,39 @@ public JobStatus(JobID jobid, float setupProgress, float mapProgress,
|
|||||||
* @param user userid of the person who submitted the job.
|
* @param user userid of the person who submitted the job.
|
||||||
* @param jobName user-specified job name.
|
* @param jobName user-specified job name.
|
||||||
* @param queue queue name
|
* @param queue queue name
|
||||||
* @param jobFile job configuration file.
|
* @param jobFile job configuration file.
|
||||||
* @param trackingUrl link to the web-ui for details of the job.
|
* @param trackingUrl link to the web-ui for details of the job.
|
||||||
*/
|
*/
|
||||||
public JobStatus(JobID jobid, float setupProgress, float mapProgress,
|
public JobStatus(JobID jobid, float setupProgress, float mapProgress,
|
||||||
float reduceProgress, float cleanupProgress,
|
float reduceProgress, float cleanupProgress,
|
||||||
State runState, JobPriority jp,
|
State runState, JobPriority jp,
|
||||||
String user, String jobName, String queue,
|
String user, String jobName, String queue,
|
||||||
String jobFile, String trackingUrl) {
|
String jobFile, String trackingUrl) {
|
||||||
|
this(jobid, setupProgress, mapProgress, reduceProgress, cleanupProgress,
|
||||||
|
runState, jp, user, jobName, queue, jobFile, trackingUrl, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a job status object for a given jobid.
|
||||||
|
* @param jobid The jobid of the job
|
||||||
|
* @param setupProgress The progress made on the setup
|
||||||
|
* @param mapProgress The progress made on the maps
|
||||||
|
* @param reduceProgress The progress made on the reduces
|
||||||
|
* @param cleanupProgress The progress made on the cleanup
|
||||||
|
* @param runState The current state of the job
|
||||||
|
* @param jp Priority of the job.
|
||||||
|
* @param user userid of the person who submitted the job.
|
||||||
|
* @param jobName user-specified job name.
|
||||||
|
* @param queue queue name
|
||||||
|
* @param jobFile job configuration file.
|
||||||
|
* @param trackingUrl link to the web-ui for details of the job.
|
||||||
|
* @param isUber Whether job running in uber mode
|
||||||
|
*/
|
||||||
|
public JobStatus(JobID jobid, float setupProgress, float mapProgress,
|
||||||
|
float reduceProgress, float cleanupProgress,
|
||||||
|
State runState, JobPriority jp,
|
||||||
|
String user, String jobName, String queue,
|
||||||
|
String jobFile, String trackingUrl, boolean isUber) {
|
||||||
this.jobid = jobid;
|
this.jobid = jobid;
|
||||||
this.setupProgress = setupProgress;
|
this.setupProgress = setupProgress;
|
||||||
this.mapProgress = mapProgress;
|
this.mapProgress = mapProgress;
|
||||||
@ -161,8 +186,9 @@ public JobStatus(JobID jobid, float setupProgress, float mapProgress,
|
|||||||
this.jobName = jobName;
|
this.jobName = jobName;
|
||||||
this.jobFile = jobFile;
|
this.jobFile = jobFile;
|
||||||
this.trackingUrl = trackingUrl;
|
this.trackingUrl = trackingUrl;
|
||||||
|
this.isUber = isUber;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the map progress of this job
|
* Sets the map progress of this job
|
||||||
@ -411,6 +437,7 @@ public synchronized void write(DataOutput out) throws IOException {
|
|||||||
Text.writeString(out, jobName);
|
Text.writeString(out, jobName);
|
||||||
Text.writeString(out, trackingUrl);
|
Text.writeString(out, trackingUrl);
|
||||||
Text.writeString(out, jobFile);
|
Text.writeString(out, jobFile);
|
||||||
|
out.writeBoolean(isUber);
|
||||||
|
|
||||||
// Serialize the job's ACLs
|
// Serialize the job's ACLs
|
||||||
out.writeInt(jobACLs.size());
|
out.writeInt(jobACLs.size());
|
||||||
@ -438,6 +465,7 @@ public synchronized void readFields(DataInput in) throws IOException {
|
|||||||
this.jobName = Text.readString(in);
|
this.jobName = Text.readString(in);
|
||||||
this.trackingUrl = Text.readString(in);
|
this.trackingUrl = Text.readString(in);
|
||||||
this.jobFile = Text.readString(in);
|
this.jobFile = Text.readString(in);
|
||||||
|
this.isUber = in.readBoolean();
|
||||||
|
|
||||||
// De-serialize the job's ACLs
|
// De-serialize the job's ACLs
|
||||||
int numACLs = in.readInt();
|
int numACLs = in.readInt();
|
||||||
@ -562,9 +590,26 @@ public void setNeededMem(int n) {
|
|||||||
this.neededMem = n;
|
this.neededMem = n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether job running in uber mode
|
||||||
|
* @return job in uber-mode
|
||||||
|
*/
|
||||||
|
public synchronized boolean isUber() {
|
||||||
|
return isUber;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set uber-mode flag
|
||||||
|
* @param isUber Whether job running in uber-mode
|
||||||
|
*/
|
||||||
|
public synchronized void setUber(boolean isUber) {
|
||||||
|
this.isUber = isUber;
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuffer buffer = new StringBuffer();
|
StringBuffer buffer = new StringBuffer();
|
||||||
buffer.append("job-id : " + jobid);
|
buffer.append("job-id : " + jobid);
|
||||||
|
buffer.append("uber-mode : " + isUber);
|
||||||
buffer.append("map-progress : " + mapProgress);
|
buffer.append("map-progress : " + mapProgress);
|
||||||
buffer.append("reduce-progress : " + reduceProgress);
|
buffer.append("reduce-progress : " + reduceProgress);
|
||||||
buffer.append("cleanup-progress : " + cleanupProgress);
|
buffer.append("cleanup-progress : " + cleanupProgress);
|
||||||
|
@ -150,6 +150,10 @@ public interface MRJobConfig {
|
|||||||
|
|
||||||
public static final String NUM_REDUCE_PROFILES = "mapreduce.task.profile.reduces";
|
public static final String NUM_REDUCE_PROFILES = "mapreduce.task.profile.reduces";
|
||||||
|
|
||||||
|
public static final String TASK_MAP_PROFILE_PARAMS = "mapreduce.task.profile.map.params";
|
||||||
|
|
||||||
|
public static final String TASK_REDUCE_PROFILE_PARAMS = "mapreduce.task.profile.reduce.params";
|
||||||
|
|
||||||
public static final String TASK_TIMEOUT = "mapreduce.task.timeout";
|
public static final String TASK_TIMEOUT = "mapreduce.task.timeout";
|
||||||
|
|
||||||
public static final String TASK_ID = "mapreduce.task.id";
|
public static final String TASK_ID = "mapreduce.task.id";
|
||||||
@ -298,12 +302,6 @@ public interface MRJobConfig {
|
|||||||
"mapreduce.job.ubertask.maxreduces";
|
"mapreduce.job.ubertask.maxreduces";
|
||||||
public static final String JOB_UBERTASK_MAXBYTES =
|
public static final String JOB_UBERTASK_MAXBYTES =
|
||||||
"mapreduce.job.ubertask.maxbytes";
|
"mapreduce.job.ubertask.maxbytes";
|
||||||
public static final String UBERTASK_JAVA_OPTS =
|
|
||||||
"mapreduce.ubertask.child.java.opts"; // or mapreduce.uber.java.opts?
|
|
||||||
public static final String UBERTASK_ULIMIT =
|
|
||||||
"mapreduce.ubertask.child.ulimit"; // or mapreduce.uber.ulimit?
|
|
||||||
public static final String UBERTASK_ENV =
|
|
||||||
"mapreduce.ubertask.child.env"; // or mapreduce.uber.env?
|
|
||||||
|
|
||||||
public static final String MR_PREFIX = "yarn.app.mapreduce.";
|
public static final String MR_PREFIX = "yarn.app.mapreduce.";
|
||||||
|
|
||||||
|
@ -63,17 +63,20 @@ public void setUp() throws IOException {
|
|||||||
when(cluster.getConf()).thenReturn(conf);
|
when(cluster.getConf()).thenReturn(conf);
|
||||||
when(cluster.getClient()).thenReturn(clientProtocol);
|
when(cluster.getClient()).thenReturn(clientProtocol);
|
||||||
JobStatus jobStatus = new JobStatus(new JobID("job_000", 1), 0f, 0f, 0f, 0f,
|
JobStatus jobStatus = new JobStatus(new JobID("job_000", 1), 0f, 0f, 0f, 0f,
|
||||||
State.RUNNING, JobPriority.HIGH, "tmp-user", "tmp-jobname", "tmp-jobfile", "tmp-url");
|
State.RUNNING, JobPriority.HIGH, "tmp-user", "tmp-jobname",
|
||||||
|
"tmp-jobfile", "tmp-url");
|
||||||
job = Job.getInstance(cluster, jobStatus, conf);
|
job = Job.getInstance(cluster, jobStatus, conf);
|
||||||
job = spy(job);
|
job = spy(job);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testJobMonitorAndPrint() throws Exception {
|
public void testJobMonitorAndPrint() throws Exception {
|
||||||
JobStatus jobStatus_1 = new JobStatus(new JobID("job_000", 1), 1f, 0.1f, 0.1f, 0f,
|
JobStatus jobStatus_1 = new JobStatus(new JobID("job_000", 1), 1f, 0.1f,
|
||||||
State.RUNNING, JobPriority.HIGH, "tmp-user", "tmp-jobname", "tmp-jobfile", "tmp-url");
|
0.1f, 0f, State.RUNNING, JobPriority.HIGH, "tmp-user", "tmp-jobname",
|
||||||
JobStatus jobStatus_2 = new JobStatus(new JobID("job_000", 1), 1f, 1f, 1f, 1f,
|
"tmp-queue", "tmp-jobfile", "tmp-url", true);
|
||||||
State.SUCCEEDED, JobPriority.HIGH, "tmp-user", "tmp-jobname", "tmp-jobfile", "tmp-url");
|
JobStatus jobStatus_2 = new JobStatus(new JobID("job_000", 1), 1f, 1f,
|
||||||
|
1f, 1f, State.SUCCEEDED, JobPriority.HIGH, "tmp-user", "tmp-jobname",
|
||||||
|
"tmp-queue", "tmp-jobfile", "tmp-url", true);
|
||||||
|
|
||||||
doAnswer(
|
doAnswer(
|
||||||
new Answer<TaskCompletionEvent[]>() {
|
new Answer<TaskCompletionEvent[]>() {
|
||||||
@ -102,15 +105,21 @@ public TaskCompletionEvent[] answer(InvocationOnMock invocation)
|
|||||||
String line;
|
String line;
|
||||||
boolean foundHundred = false;
|
boolean foundHundred = false;
|
||||||
boolean foundComplete = false;
|
boolean foundComplete = false;
|
||||||
String match_1 = "map 100% reduce 100%";
|
boolean foundUber = false;
|
||||||
String match_2 = "completed successfully";
|
String match_1 = "uber mode : true";
|
||||||
|
String match_2 = "map 100% reduce 100%";
|
||||||
|
String match_3 = "completed successfully";
|
||||||
while ((line = r.readLine()) != null) {
|
while ((line = r.readLine()) != null) {
|
||||||
foundHundred = line.contains(match_1);
|
if (line.contains(match_1)) {
|
||||||
|
foundUber = true;
|
||||||
|
}
|
||||||
|
foundHundred = line.contains(match_2);
|
||||||
if (foundHundred)
|
if (foundHundred)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
line = r.readLine();
|
line = r.readLine();
|
||||||
foundComplete = line.contains(match_2);
|
foundComplete = line.contains(match_3);
|
||||||
|
assertTrue(foundUber);
|
||||||
assertTrue(foundHundred);
|
assertTrue(foundHundred);
|
||||||
assertTrue(foundComplete);
|
assertTrue(foundComplete);
|
||||||
}
|
}
|
||||||
|
@ -107,6 +107,7 @@ public CompletedJob(Configuration conf, JobId jobId, Path historyFile,
|
|||||||
report.setTrackingUrl(JobHistoryUtils.getHistoryUrl(conf, TypeConverter
|
report.setTrackingUrl(JobHistoryUtils.getHistoryUrl(conf, TypeConverter
|
||||||
.toYarn(TypeConverter.fromYarn(jobId)).getAppId()));
|
.toYarn(TypeConverter.fromYarn(jobId)).getAppId()));
|
||||||
report.setAMInfos(getAMInfos());
|
report.setAMInfos(getAMInfos());
|
||||||
|
report.setIsUber(isUber());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -168,7 +168,7 @@ public void testReconnectOnAMRestart() throws IOException {
|
|||||||
GetJobReportResponse jobReportResponse1 = mock(GetJobReportResponse.class);
|
GetJobReportResponse jobReportResponse1 = mock(GetJobReportResponse.class);
|
||||||
when(jobReportResponse1.getJobReport()).thenReturn(
|
when(jobReportResponse1.getJobReport()).thenReturn(
|
||||||
MRBuilderUtils.newJobReport(jobId, "jobName-firstGen", "user",
|
MRBuilderUtils.newJobReport(jobId, "jobName-firstGen", "user",
|
||||||
JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "anything", null));
|
JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "anything", null, false));
|
||||||
|
|
||||||
// First AM returns a report with jobName firstGen and simulates AM shutdown
|
// First AM returns a report with jobName firstGen and simulates AM shutdown
|
||||||
// on second invocation.
|
// on second invocation.
|
||||||
@ -180,7 +180,7 @@ public void testReconnectOnAMRestart() throws IOException {
|
|||||||
GetJobReportResponse jobReportResponse2 = mock(GetJobReportResponse.class);
|
GetJobReportResponse jobReportResponse2 = mock(GetJobReportResponse.class);
|
||||||
when(jobReportResponse2.getJobReport()).thenReturn(
|
when(jobReportResponse2.getJobReport()).thenReturn(
|
||||||
MRBuilderUtils.newJobReport(jobId, "jobName-secondGen", "user",
|
MRBuilderUtils.newJobReport(jobId, "jobName-secondGen", "user",
|
||||||
JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "anything", null));
|
JobState.RUNNING, 0, 0, 0, 0, 0, 0, 0, "anything", null, false));
|
||||||
|
|
||||||
// Second AM generation returns a report with jobName secondGen
|
// Second AM generation returns a report with jobName secondGen
|
||||||
MRClientProtocol secondGenAMProxy = mock(MRClientProtocol.class);
|
MRClientProtocol secondGenAMProxy = mock(MRClientProtocol.class);
|
||||||
|
@ -49,6 +49,7 @@ public static void setup() throws IOException {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@Test
|
||||||
public void testSleepJob()
|
public void testSleepJob()
|
||||||
throws IOException, InterruptedException, ClassNotFoundException {
|
throws IOException, InterruptedException, ClassNotFoundException {
|
||||||
if (mrCluster != null) {
|
if (mrCluster != null) {
|
||||||
@ -84,6 +85,7 @@ protected void verifySleepJobCounters(Job job) throws InterruptedException,
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@Test
|
||||||
public void testRandomWriter()
|
public void testRandomWriter()
|
||||||
throws IOException, InterruptedException, ClassNotFoundException {
|
throws IOException, InterruptedException, ClassNotFoundException {
|
||||||
super.testRandomWriter();
|
super.testRandomWriter();
|
||||||
@ -101,6 +103,7 @@ protected void verifyRandomWriterCounters(Job job)
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@Test
|
||||||
public void testFailingMapper()
|
public void testFailingMapper()
|
||||||
throws IOException, InterruptedException, ClassNotFoundException {
|
throws IOException, InterruptedException, ClassNotFoundException {
|
||||||
LOG.info("\n\n\nStarting uberized testFailingMapper().");
|
LOG.info("\n\n\nStarting uberized testFailingMapper().");
|
||||||
|
Loading…
Reference in New Issue
Block a user