YARN-4582. Label-related invalid resource request exception should be able to properly handled by application. (Bibin A Chundatt via wangda)

This commit is contained in:
Wangda Tan 2016-01-12 12:18:09 +08:00
parent 56b9500bbd
commit 9e792da014
5 changed files with 75 additions and 5 deletions

View File

@ -62,6 +62,7 @@
import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringInterner;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
@ -79,6 +80,7 @@
import org.apache.hadoop.yarn.client.api.NMTokenCache;
import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException;
import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException;
import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
@ -740,6 +742,16 @@ private List<Container> getResources() throws Exception {
register();
addOutstandingRequestOnResync();
return null;
} catch (InvalidLabelResourceRequestException e) {
// If Invalid label exception is received means the requested label doesnt
// have access so killing job in this case.
String diagMsg = "Requested node-label-expression is invalid: "
+ StringUtils.stringifyException(e);
LOG.info(diagMsg);
JobId jobId = this.getJob().getID();
eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
throw e;
} catch (Exception e) {
// This can happen when the connection to the RM has gone down. Keep
// re-trying until the retryInterval has expired.

View File

@ -686,6 +686,9 @@ Release 2.8.0 - UNRELEASED
YARN-4537. Pull out priority comparison from fifocomparator and use compound
comparator for FifoOrdering policy. (Rohith Sharma K S via jianhe)
YARN-4582. Label-related invalid resource request exception should be able to
properly handled by application. (Bibin A Chundatt via wangda)
OPTIMIZATIONS
YARN-3339. TestDockerContainerExecutor should pull a single image and not

View File

@ -0,0 +1,49 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.exceptions;
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
/**
* This exception is thrown when a resource requested via
* {@link ResourceRequest} in the
* {@link ApplicationMasterProtocol#allocate(AllocateRequest)} when requested
* label is not having permission to access.
*
*/
public class InvalidLabelResourceRequestException
extends InvalidResourceRequestException {
private static final long serialVersionUID = 13498237L;
public InvalidLabelResourceRequestException(Throwable cause) {
super(cause);
}
public InvalidLabelResourceRequestException(String message) {
super(message);
}
public InvalidLabelResourceRequestException(String message, Throwable cause) {
super(message, cause);
}
}

View File

@ -32,6 +32,7 @@
import org.apache.hadoop.yarn.api.records.QueueInfo;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException;
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
@ -291,7 +292,7 @@ private static void validateResourceRequest(ResourceRequest resReq,
// we don't allow specify label expression with more than one node labels now
if (labelExp != null && labelExp.contains("&&")) {
throw new InvalidResourceRequestException(
throw new InvalidLabelResourceRequestException(
"Invailid resource request, queue=" + queueInfo.getQueueName()
+ " specified more than one node label "
+ "in a node label expression, node label expression = "
@ -301,7 +302,8 @@ private static void validateResourceRequest(ResourceRequest resReq,
if (labelExp != null && !labelExp.trim().isEmpty() && queueInfo != null) {
if (!checkQueueLabelExpression(queueInfo.getAccessibleNodeLabels(),
labelExp, rmContext)) {
throw new InvalidResourceRequestException("Invalid resource request"
throw new InvalidLabelResourceRequestException(
"Invalid resource request"
+ ", queue="
+ queueInfo.getQueueName()
+ " doesn't have permission to access all labels "

View File

@ -60,6 +60,7 @@
import org.apache.hadoop.yarn.api.records.impl.pb.ResourceRequestPBImpl;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException;
import org.apache.hadoop.yarn.exceptions.InvalidResourceBlacklistRequestException;
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
import org.apache.hadoop.yarn.ipc.YarnRPC;
@ -191,7 +192,7 @@ public void testNormalizeRequestWithDominantResourceCalculator() {
assertEquals(2048, ask.getCapability().getMemory());
}
@Test (timeout = 30000)
@Test(timeout = 30000)
public void testValidateResourceRequestWithErrorLabelsPermission()
throws IOException {
// mock queue and scheduler
@ -336,7 +337,7 @@ public void testValidateResourceRequestWithErrorLabelsPermission()
e.printStackTrace();
fail("Should be valid when request labels is empty");
}
boolean invalidlabelexception=false;
// queue doesn't have label, failed (when request any label)
try {
// set queue accessible node labels to empty
@ -354,12 +355,15 @@ public void testValidateResourceRequestWithErrorLabelsPermission()
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue",
scheduler, rmContext);
fail("Should fail");
} catch (InvalidLabelResourceRequestException e) {
invalidlabelexception=true;
} catch (InvalidResourceRequestException e) {
} finally {
rmContext.getNodeLabelManager().removeFromClusterNodeLabels(
Arrays.asList("x"));
}
Assert.assertTrue("InvalidLabelResourceRequestException excpeted",
invalidlabelexception);
// queue is "*", always succeeded
try {
// set queue accessible node labels to empty