YARN-11709. NodeManager should be shut down or blacklisted when it cacannot run program /var/lib/yarn-ce/bin/container-executor (#6960)
This commit is contained in:
parent
5f93edfd70
commit
f00094203b
@ -451,8 +451,10 @@ public void startLocalizer(LocalizerStartContext ctx)
|
||||
|
||||
} catch (PrivilegedOperationException e) {
|
||||
int exitCode = e.getExitCode();
|
||||
LOG.warn("Exit code from container {} startLocalizer is : {}",
|
||||
locId, exitCode, e);
|
||||
LOG.error("Unrecoverable issue occurred. Marking the node as unhealthy to prevent "
|
||||
+ "further containers to get scheduled on the node and cause application failures. " +
|
||||
"Exit code from the container " + locId + "startLocalizer is : " + exitCode, e);
|
||||
nmContext.getNodeStatusUpdater().reportException(e);
|
||||
|
||||
throw new IOException("Application " + appId + " initialization failed" +
|
||||
" (exitCode=" + exitCode + ") with output: " + e.getOutput(), e);
|
||||
|
@ -26,6 +26,7 @@
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.anyBoolean;
|
||||
import static org.mockito.Mockito.doAnswer;
|
||||
import static org.mockito.Mockito.doNothing;
|
||||
import static org.mockito.Mockito.doThrow;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.spy;
|
||||
@ -37,6 +38,7 @@
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.io.LineNumberReader;
|
||||
import java.lang.reflect.Field;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
@ -345,7 +347,8 @@ public void testStartLocalizer() throws IOException {
|
||||
|
||||
@Test
|
||||
public void testContainerLaunchError()
|
||||
throws IOException, ContainerExecutionException, URISyntaxException {
|
||||
throws IOException, ContainerExecutionException, URISyntaxException, IllegalAccessException,
|
||||
NoSuchFieldException {
|
||||
|
||||
final String[] expecetedMessage = {"badcommand", "Exit code: 24"};
|
||||
final String[] executor = {
|
||||
@ -387,6 +390,14 @@ public Object answer(InvocationOnMock invocationOnMock)
|
||||
dirsHandler.init(conf);
|
||||
mockExec.setConf(conf);
|
||||
|
||||
//set the private nmContext field without initing the LinuxContainerExecutor
|
||||
NodeManager nodeManager = new NodeManager();
|
||||
NodeManager.NMContext nmContext =
|
||||
nodeManager.createNMContext(null, null, null, false, conf);
|
||||
Field lceNmContext = LinuxContainerExecutor.class.getDeclaredField("nmContext");
|
||||
lceNmContext.setAccessible(true);
|
||||
lceNmContext.set(mockExec, nmContext);
|
||||
|
||||
String appSubmitter = "nobody";
|
||||
String cmd = String
|
||||
.valueOf(PrivilegedOperation.RunAsUserCommand.LAUNCH_CONTAINER.
|
||||
@ -601,8 +612,6 @@ public void testNoExitCodeFromPrivilegedOperation() throws Exception {
|
||||
LinuxContainerRuntime runtime = new DefaultLinuxContainerRuntime(
|
||||
spyPrivilegedExecutor);
|
||||
runtime.initialize(conf, null);
|
||||
mockExec = new LinuxContainerExecutor(runtime);
|
||||
mockExec.setConf(conf);
|
||||
LinuxContainerExecutor lce = new LinuxContainerExecutor(runtime) {
|
||||
@Override
|
||||
protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() {
|
||||
@ -610,6 +619,23 @@ protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() {
|
||||
}
|
||||
};
|
||||
lce.setConf(conf);
|
||||
|
||||
//set the private nmContext field without initing the LinuxContainerExecutor
|
||||
NodeManager nodeManager = new NodeManager();
|
||||
NodeManager.NMContext nmContext =
|
||||
nodeManager.createNMContext(null, null, null, false, conf);
|
||||
NodeManager.NMContext spyNmContext = spy(nmContext);
|
||||
|
||||
//initialize a mock NodeStatusUpdater
|
||||
NodeStatusUpdaterImpl nodeStatusUpdater = mock(NodeStatusUpdaterImpl.class);
|
||||
nmContext.setNodeStatusUpdater(nodeStatusUpdater);
|
||||
//imitate a void method call on the NodeStatusUpdater when setting NM unhealthy.
|
||||
doNothing().when(nodeStatusUpdater).reportException(any());
|
||||
|
||||
Field lceNmContext = LinuxContainerExecutor.class.getDeclaredField("nmContext");
|
||||
lceNmContext.setAccessible(true);
|
||||
lceNmContext.set(lce, nmContext);
|
||||
|
||||
InetSocketAddress address = InetSocketAddress.createUnresolved(
|
||||
"localhost", 8040);
|
||||
Path nmPrivateCTokensPath= new Path("file:///bin/nmPrivateCTokensPath");
|
||||
@ -672,6 +698,9 @@ protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() {
|
||||
assertTrue("Unexpected exception " + e,
|
||||
e.getMessage().contains("exit code"));
|
||||
}
|
||||
|
||||
//verify that the NM was set unhealthy on PrivilegedOperationException
|
||||
verify(nodeStatusUpdater, times(1)).reportException(any());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
Loading…
Reference in New Issue
Block a user