YARN-8508. Release GPU resource for killed container.
Contributed by Chandni Singh
This commit is contained in:
parent
79091cf76f
commit
ed9d60e888
@ -573,15 +573,7 @@ private int handleLaunchForLaunchType(ContainerStartContext ctx,
|
||||
return handleExitCode(e, container, containerId);
|
||||
} finally {
|
||||
resourcesHandler.postExecute(containerId);
|
||||
|
||||
try {
|
||||
if (resourceHandlerChain != null) {
|
||||
resourceHandlerChain.postComplete(containerId);
|
||||
}
|
||||
} catch (ResourceHandlerException e) {
|
||||
LOG.warn("ResourceHandlerChain.postComplete failed for " +
|
||||
"containerId: " + containerId + ". Exception: " + e);
|
||||
}
|
||||
postComplete(containerId);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -721,14 +713,7 @@ public int reacquireContainer(ContainerReacquisitionContext ctx)
|
||||
return super.reacquireContainer(ctx);
|
||||
} finally {
|
||||
resourcesHandler.postExecute(containerId);
|
||||
if (resourceHandlerChain != null) {
|
||||
try {
|
||||
resourceHandlerChain.postComplete(containerId);
|
||||
} catch (ResourceHandlerException e) {
|
||||
LOG.warn("ResourceHandlerChain.postComplete failed for " +
|
||||
"containerId: " + containerId + " Exception: " + e);
|
||||
}
|
||||
}
|
||||
postComplete(containerId);
|
||||
}
|
||||
}
|
||||
|
||||
@ -798,6 +783,8 @@ public boolean reapContainer(ContainerReapContext ctx) throws IOException {
|
||||
logOutput(e.getOutput());
|
||||
throw new IOException("Error in reaping container "
|
||||
+ container.getContainerId().toString() + " exit = " + retCode, e);
|
||||
} finally {
|
||||
postComplete(container.getContainerId());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -968,4 +955,17 @@ public void removeDockerContainer(String containerId) {
|
||||
LOG.warn("Unable to remove docker container: " + containerId);
|
||||
}
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
void postComplete(final ContainerId containerId) {
|
||||
try {
|
||||
if (resourceHandlerChain != null) {
|
||||
LOG.debug("{} post complete", containerId);
|
||||
resourceHandlerChain.postComplete(containerId);
|
||||
}
|
||||
} catch (ResourceHandlerException e) {
|
||||
LOG.warn("ResourceHandlerChain.postComplete failed for " +
|
||||
"containerId: {}. Exception: ", containerId, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -25,11 +25,14 @@
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
import static org.mockito.Matchers.anyObject;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.spy;
|
||||
import static org.mockito.Mockito.times;
|
||||
import static org.mockito.Mockito.verify;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntime;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@ -40,6 +43,7 @@
|
||||
import java.io.PrintWriter;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
@ -667,12 +671,15 @@ public void testRemoveDockerContainer() throws Exception {
|
||||
@Test
|
||||
public void testReapContainer() throws Exception {
|
||||
Container container = mock(Container.class);
|
||||
LinuxContainerExecutor lce = mock(LinuxContainerExecutor.class);
|
||||
LinuxContainerRuntime containerRuntime = mock(LinuxContainerRuntime.class);
|
||||
LinuxContainerExecutor lce = spy(new LinuxContainerExecutor(
|
||||
containerRuntime));
|
||||
ContainerReapContext.Builder builder = new ContainerReapContext.Builder();
|
||||
builder.setContainer(container).setUser("foo");
|
||||
ContainerReapContext ctx = builder.build();
|
||||
lce.reapContainer(ctx);
|
||||
verify(lce, times(1)).reapContainer(ctx);
|
||||
verify(lce, times(1)).postComplete(anyObject());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
Loading…
Reference in New Issue
Block a user