Configurable timeout between YARNRunner terminate the application and forcefully kill. Contributed by Eric Payne.
This commit is contained in:
parent
7711049837
commit
d39bc903a0
@ -335,6 +335,9 @@ Release 2.7.0 - UNRELEASED
|
|||||||
MAPREDUCE-6267. Refactor JobSubmitter#copyAndConfigureFiles into it's own
|
MAPREDUCE-6267. Refactor JobSubmitter#copyAndConfigureFiles into it's own
|
||||||
class. (Chris Trezzo via kasha)
|
class. (Chris Trezzo via kasha)
|
||||||
|
|
||||||
|
MAPREDUCE-6263. Configurable timeout between YARNRunner terminate the
|
||||||
|
application and forcefully kill. (Eric Payne via junping_du)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
MAPREDUCE-6169. MergeQueue should release reference to the current item
|
MAPREDUCE-6169. MergeQueue should release reference to the current item
|
||||||
|
@ -644,6 +644,11 @@ public interface MRJobConfig {
|
|||||||
public static final int DEFAULT_MR_AM_HISTORY_USE_BATCHED_FLUSH_QUEUE_SIZE_THRESHOLD =
|
public static final int DEFAULT_MR_AM_HISTORY_USE_BATCHED_FLUSH_QUEUE_SIZE_THRESHOLD =
|
||||||
50;
|
50;
|
||||||
|
|
||||||
|
public static final String MR_AM_HARD_KILL_TIMEOUT_MS =
|
||||||
|
MR_AM_PREFIX + "hard-kill-timeout-ms";
|
||||||
|
public static final long DEFAULT_MR_AM_HARD_KILL_TIMEOUT_MS =
|
||||||
|
10 * 1000l;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The threshold in terms of seconds after which an unsatisfied mapper request
|
* The threshold in terms of seconds after which an unsatisfied mapper request
|
||||||
* triggers reducer preemption to free space. Default 0 implies that the reduces
|
* triggers reducer preemption to free space. Default 0 implies that the reduces
|
||||||
|
@ -1359,6 +1359,14 @@
|
|||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>yarn.app.mapreduce.am.hard-kill-timeout-ms</name>
|
||||||
|
<value>10000</value>
|
||||||
|
<description>
|
||||||
|
Number of milliseconds to wait before the job client kills the application.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>CLASSPATH for MR applications. A comma-separated list
|
<description>CLASSPATH for MR applications. A comma-separated list
|
||||||
of CLASSPATH entries. If mapreduce.application.framework is set then this
|
of CLASSPATH entries. If mapreduce.application.framework is set then this
|
||||||
|
@ -640,7 +640,10 @@ public void killJob(JobID arg0) throws IOException, InterruptedException {
|
|||||||
clientCache.getClient(arg0).killJob(arg0);
|
clientCache.getClient(arg0).killJob(arg0);
|
||||||
long currentTimeMillis = System.currentTimeMillis();
|
long currentTimeMillis = System.currentTimeMillis();
|
||||||
long timeKillIssued = currentTimeMillis;
|
long timeKillIssued = currentTimeMillis;
|
||||||
while ((currentTimeMillis < timeKillIssued + 10000L)
|
long killTimeOut =
|
||||||
|
conf.getLong(MRJobConfig.MR_AM_HARD_KILL_TIMEOUT_MS,
|
||||||
|
MRJobConfig.DEFAULT_MR_AM_HARD_KILL_TIMEOUT_MS);
|
||||||
|
while ((currentTimeMillis < timeKillIssued + killTimeOut)
|
||||||
&& !isJobInTerminalState(status)) {
|
&& !isJobInTerminalState(status)) {
|
||||||
try {
|
try {
|
||||||
Thread.sleep(1000L);
|
Thread.sleep(1000L);
|
||||||
|
@ -201,6 +201,32 @@ public ClientServiceDelegate answer(InvocationOnMock invocation)
|
|||||||
verify(clientDelegate).killJob(jobId);
|
verify(clientDelegate).killJob(jobId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout=60000)
|
||||||
|
public void testJobKillTimeout() throws Exception {
|
||||||
|
long timeToWaitBeforeHardKill =
|
||||||
|
10000 + MRJobConfig.DEFAULT_MR_AM_HARD_KILL_TIMEOUT_MS;
|
||||||
|
conf.setLong(MRJobConfig.MR_AM_HARD_KILL_TIMEOUT_MS,
|
||||||
|
timeToWaitBeforeHardKill);
|
||||||
|
clientDelegate = mock(ClientServiceDelegate.class);
|
||||||
|
doAnswer(
|
||||||
|
new Answer<ClientServiceDelegate>() {
|
||||||
|
@Override
|
||||||
|
public ClientServiceDelegate answer(InvocationOnMock invocation)
|
||||||
|
throws Throwable {
|
||||||
|
return clientDelegate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
).when(clientCache).getClient(any(JobID.class));
|
||||||
|
when(clientDelegate.getJobStatus(any(JobID.class))).thenReturn(new
|
||||||
|
org.apache.hadoop.mapreduce.JobStatus(jobId, 0f, 0f, 0f, 0f,
|
||||||
|
State.RUNNING, JobPriority.HIGH, "tmp", "tmp", "tmp", "tmp"));
|
||||||
|
long startTimeMillis = System.currentTimeMillis();
|
||||||
|
yarnRunner.killJob(jobId);
|
||||||
|
assertTrue("killJob should have waited at least " + timeToWaitBeforeHardKill
|
||||||
|
+ " ms.", System.currentTimeMillis() - startTimeMillis
|
||||||
|
>= timeToWaitBeforeHardKill);
|
||||||
|
}
|
||||||
|
|
||||||
@Test(timeout=20000)
|
@Test(timeout=20000)
|
||||||
public void testJobSubmissionFailure() throws Exception {
|
public void testJobSubmissionFailure() throws Exception {
|
||||||
when(resourceMgrDelegate.submitApplication(any(ApplicationSubmissionContext.class))).
|
when(resourceMgrDelegate.submitApplication(any(ApplicationSubmissionContext.class))).
|
||||||
|
Loading…
Reference in New Issue
Block a user