From 8ebf37f3691dee523f7d800bc82c7423c3e262e9 Mon Sep 17 00:00:00 2001 From: Bikas Saha Date: Fri, 4 Oct 2013 00:23:35 +0000 Subject: [PATCH] YARN-1256. NM silently ignores non-existent service in StartContainerRequest (Xuan Gong via bikas) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1529039 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../yarn/util/AuxiliaryServiceHelper.java | 7 +- .../containermanager/AuxServices.java | 88 ++++++++++++------- .../ContainerManagerImpl.java | 13 +++ .../TestContainerManagerWithLCE.java | 11 +++ .../TestContainerManager.java | 46 ++++++++++ 6 files changed, 134 insertions(+), 34 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 3559edaeae..6840dde667 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -144,6 +144,9 @@ Release 2.1.2 - UNRELEASED YARN-1236. FairScheduler setting queue name in RMApp is not working. (Sandy Ryza) + YARN-1256. NM silently ignores non-existent service in + StartContainerRequest (Xuan Gong via bikas) + Release 2.1.1-beta - 2013-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AuxiliaryServiceHelper.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AuxiliaryServiceHelper.java index 23fc50fcec..cb118f56da 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AuxiliaryServiceHelper.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AuxiliaryServiceHelper.java @@ -30,8 +30,11 @@ public class AuxiliaryServiceHelper { public static ByteBuffer getServiceDataFromEnv(String serviceName, Map env) { - byte[] metaData = - Base64.decodeBase64(env.get(getPrefixServiceName(serviceName))); + String meta = env.get(getPrefixServiceName(serviceName)); + if (null == meta) { + return null; + } + byte[] metaData = Base64.decodeBase64(meta); return ByteBuffer.wrap(metaData); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java index 0e0e7668f4..5fe5b141bc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java @@ -175,39 +175,56 @@ public void handle(AuxServicesEvent event) { LOG.info("Got event " + event.getType() + " for appId " + event.getApplicationID()); switch (event.getType()) { - case APPLICATION_INIT: - LOG.info("Got APPLICATION_INIT for service " + event.getServiceID()); - AuxiliaryService service = serviceMap.get(event.getServiceID()); - if (null == service) { - LOG.info("service is null"); - // TODO kill all containers waiting on Application - return; - } - service.initializeApplication(new ApplicationInitializationContext(event - .getUser(), event.getApplicationID(), event.getServiceData())); - break; - case APPLICATION_STOP: - for (AuxiliaryService serv : serviceMap.values()) { - serv.stopApplication(new ApplicationTerminationContext(event - .getApplicationID())); - } - break; - case CONTAINER_INIT: - for (AuxiliaryService serv : serviceMap.values()) { - serv.initializeContainer(new ContainerInitializationContext( - event.getUser(), event.getContainer().getContainerId(), - event.getContainer().getResource())); - } - break; - case CONTAINER_STOP: - for (AuxiliaryService serv : serviceMap.values()) { - serv.stopContainer(new ContainerTerminationContext( - event.getUser(), event.getContainer().getContainerId(), - event.getContainer().getResource())); - } - break; + case APPLICATION_INIT: + LOG.info("Got APPLICATION_INIT for service " + event.getServiceID()); + AuxiliaryService service = null; + try { + service = serviceMap.get(event.getServiceID()); + service + .initializeApplication(new ApplicationInitializationContext(event + .getUser(), event.getApplicationID(), event.getServiceData())); + } catch (Throwable th) { + logWarningWhenAuxServiceThrowExceptions(service, + AuxServicesEventType.APPLICATION_INIT, th); + } + break; + case APPLICATION_STOP: + for (AuxiliaryService serv : serviceMap.values()) { + try { + serv.stopApplication(new ApplicationTerminationContext(event + .getApplicationID())); + } catch (Throwable th) { + logWarningWhenAuxServiceThrowExceptions(serv, + AuxServicesEventType.APPLICATION_STOP, th); + } + } + break; + case CONTAINER_INIT: + for (AuxiliaryService serv : serviceMap.values()) { + try { + serv.initializeContainer(new ContainerInitializationContext( + event.getUser(), event.getContainer().getContainerId(), + event.getContainer().getResource())); + } catch (Throwable th) { + logWarningWhenAuxServiceThrowExceptions(serv, + AuxServicesEventType.CONTAINER_INIT, th); + } + } + break; + case CONTAINER_STOP: + for (AuxiliaryService serv : serviceMap.values()) { + try { + serv.stopContainer(new ContainerTerminationContext( + event.getUser(), event.getContainer().getContainerId(), + event.getContainer().getResource())); + } catch (Throwable th) { + logWarningWhenAuxServiceThrowExceptions(serv, + AuxServicesEventType.CONTAINER_STOP, th); + } + } + break; default: - throw new RuntimeException("Unknown type: " + event.getType()); + throw new RuntimeException("Unknown type: " + event.getType()); } } @@ -217,4 +234,11 @@ private boolean validateAuxServiceName(String name) { } return p.matcher(name).matches(); } + + private void logWarningWhenAuxServiceThrowExceptions(AuxiliaryService service, + AuxServicesEventType eventType, Throwable th) { + LOG.warn((null == service ? "The auxService is null" + : "The auxService name is " + service.getName()) + + " and it got an error at event: " + eventType, th); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index 0af4332cef..f24a5544df 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -67,6 +67,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.exceptions.InvalidAuxServiceException; import org.apache.hadoop.yarn.exceptions.InvalidContainerException; import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException; import org.apache.hadoop.yarn.exceptions.YarnException; @@ -451,6 +452,18 @@ private void startContainerInternal(NMTokenIdentifier nmTokenIdentifier, ContainerLaunchContext launchContext = request.getContainerLaunchContext(); + Map serviceData = getAuxServiceMetaData(); + if (launchContext.getServiceData()!=null && + !launchContext.getServiceData().isEmpty()) { + for (Map.Entry meta : launchContext.getServiceData() + .entrySet()) { + if (null == serviceData.get(meta.getKey())) { + throw new InvalidAuxServiceException("The auxService:" + meta.getKey() + + " does not exist"); + } + } + } + Credentials credentials = parseCredentials(launchContext); Container container = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java index cc9b7d9ce0..a47e7f78e1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java @@ -178,6 +178,17 @@ public void testMultipleContainersStopAndGetStatus() throws Exception { super.testMultipleContainersStopAndGetStatus(); } + @Override + public void testStartContainerFailureWithUnknownAuxService() throws Exception { + // Don't run the test if the binary is not available. + if (!shouldRunTest()) { + LOG.info("LCE binary path is not passed. Not running the test"); + return; + } + LOG.info("Running testContainerLaunchFromPreviousRM"); + super.testStartContainerFailureWithUnknownAuxService(); + } + private boolean shouldRunTest() { return System .getProperty(YarnConfiguration.NM_LINUX_CONTAINER_EXECUTOR_PATH) != null; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index e5b318ee4c..90cd16e16e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.io.PrintWriter; import java.net.InetAddress; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -37,6 +38,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.service.Service; import org.apache.hadoop.util.Shell; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; @@ -59,6 +61,7 @@ import org.apache.hadoop.yarn.api.records.SerializedException; import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.api.records.URL; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.InvalidContainerException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; @@ -68,6 +71,7 @@ import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.TestAuxServices.ServiceA; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; @@ -746,6 +750,48 @@ public void testMultipleContainersStopAndGetStatus() throws Exception { } } + @Test + public void testStartContainerFailureWithUnknownAuxService() throws Exception { + conf.setStrings(YarnConfiguration.NM_AUX_SERVICES, + new String[] { "existService" }); + conf.setClass( + String.format(YarnConfiguration.NM_AUX_SERVICE_FMT, "existService"), + ServiceA.class, Service.class); + containerManager.start(); + + List startRequest = + new ArrayList(); + + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); + Map serviceData = new HashMap(); + String serviceName = "non_exist_auxService"; + serviceData.put(serviceName, ByteBuffer.wrap(serviceName.getBytes())); + containerLaunchContext.setServiceData(serviceData); + + ContainerId cId = createContainerId(0); + String user = "start_container_fail"; + Token containerToken = + createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), + user, context.getContainerTokenSecretManager()); + StartContainerRequest request = + StartContainerRequest.newInstance(containerLaunchContext, + containerToken); + + // start containers + startRequest.add(request); + StartContainersRequest requestList = + StartContainersRequest.newInstance(startRequest); + + StartContainersResponse response = + containerManager.startContainers(requestList); + Assert.assertTrue(response.getFailedRequests().size() == 1); + Assert.assertTrue(response.getSuccessfullyStartedContainers().size() == 0); + Assert.assertTrue(response.getFailedRequests().containsKey(cId)); + Assert.assertTrue(response.getFailedRequests().get(cId).getMessage() + .contains("The auxService:" + serviceName + " does not exist")); + } + public static Token createContainerToken(ContainerId cId, long rmIdentifier, NodeId nodeId, String user, NMContainerTokenSecretManager containerTokenSecretManager)