YARN-4582. Label-related invalid resource request exception should be able to properly handled by application. (Bibin A Chundatt via wangda)
This commit is contained in:
parent
56b9500bbd
commit
9e792da014
@ -62,6 +62,7 @@
|
||||
import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.util.StringInterner;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
||||
@ -79,6 +80,7 @@
|
||||
import org.apache.hadoop.yarn.client.api.NMTokenCache;
|
||||
import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException;
|
||||
import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException;
|
||||
import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
||||
@ -740,6 +742,16 @@ private List<Container> getResources() throws Exception {
|
||||
register();
|
||||
addOutstandingRequestOnResync();
|
||||
return null;
|
||||
} catch (InvalidLabelResourceRequestException e) {
|
||||
// If Invalid label exception is received means the requested label doesnt
|
||||
// have access so killing job in this case.
|
||||
String diagMsg = "Requested node-label-expression is invalid: "
|
||||
+ StringUtils.stringifyException(e);
|
||||
LOG.info(diagMsg);
|
||||
JobId jobId = this.getJob().getID();
|
||||
eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg));
|
||||
eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
// This can happen when the connection to the RM has gone down. Keep
|
||||
// re-trying until the retryInterval has expired.
|
||||
|
@ -686,6 +686,9 @@ Release 2.8.0 - UNRELEASED
|
||||
YARN-4537. Pull out priority comparison from fifocomparator and use compound
|
||||
comparator for FifoOrdering policy. (Rohith Sharma K S via jianhe)
|
||||
|
||||
YARN-4582. Label-related invalid resource request exception should be able to
|
||||
properly handled by application. (Bibin A Chundatt via wangda)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
YARN-3339. TestDockerContainerExecutor should pull a single image and not
|
||||
|
@ -0,0 +1,49 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.exceptions;
|
||||
|
||||
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
|
||||
/**
|
||||
* This exception is thrown when a resource requested via
|
||||
* {@link ResourceRequest} in the
|
||||
* {@link ApplicationMasterProtocol#allocate(AllocateRequest)} when requested
|
||||
* label is not having permission to access.
|
||||
*
|
||||
*/
|
||||
public class InvalidLabelResourceRequestException
|
||||
extends InvalidResourceRequestException {
|
||||
|
||||
private static final long serialVersionUID = 13498237L;
|
||||
|
||||
public InvalidLabelResourceRequestException(Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
|
||||
public InvalidLabelResourceRequestException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public InvalidLabelResourceRequestException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
|
||||
}
|
@ -32,6 +32,7 @@
|
||||
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException;
|
||||
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
|
||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||
@ -291,7 +292,7 @@ private static void validateResourceRequest(ResourceRequest resReq,
|
||||
|
||||
// we don't allow specify label expression with more than one node labels now
|
||||
if (labelExp != null && labelExp.contains("&&")) {
|
||||
throw new InvalidResourceRequestException(
|
||||
throw new InvalidLabelResourceRequestException(
|
||||
"Invailid resource request, queue=" + queueInfo.getQueueName()
|
||||
+ " specified more than one node label "
|
||||
+ "in a node label expression, node label expression = "
|
||||
@ -301,7 +302,8 @@ private static void validateResourceRequest(ResourceRequest resReq,
|
||||
if (labelExp != null && !labelExp.trim().isEmpty() && queueInfo != null) {
|
||||
if (!checkQueueLabelExpression(queueInfo.getAccessibleNodeLabels(),
|
||||
labelExp, rmContext)) {
|
||||
throw new InvalidResourceRequestException("Invalid resource request"
|
||||
throw new InvalidLabelResourceRequestException(
|
||||
"Invalid resource request"
|
||||
+ ", queue="
|
||||
+ queueInfo.getQueueName()
|
||||
+ " doesn't have permission to access all labels "
|
||||
|
@ -60,6 +60,7 @@
|
||||
import org.apache.hadoop.yarn.api.records.impl.pb.ResourceRequestPBImpl;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.event.EventHandler;
|
||||
import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException;
|
||||
import org.apache.hadoop.yarn.exceptions.InvalidResourceBlacklistRequestException;
|
||||
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
|
||||
import org.apache.hadoop.yarn.ipc.YarnRPC;
|
||||
@ -191,7 +192,7 @@ public void testNormalizeRequestWithDominantResourceCalculator() {
|
||||
assertEquals(2048, ask.getCapability().getMemory());
|
||||
}
|
||||
|
||||
@Test (timeout = 30000)
|
||||
@Test(timeout = 30000)
|
||||
public void testValidateResourceRequestWithErrorLabelsPermission()
|
||||
throws IOException {
|
||||
// mock queue and scheduler
|
||||
@ -336,7 +337,7 @@ public void testValidateResourceRequestWithErrorLabelsPermission()
|
||||
e.printStackTrace();
|
||||
fail("Should be valid when request labels is empty");
|
||||
}
|
||||
|
||||
boolean invalidlabelexception=false;
|
||||
// queue doesn't have label, failed (when request any label)
|
||||
try {
|
||||
// set queue accessible node labels to empty
|
||||
@ -354,12 +355,15 @@ public void testValidateResourceRequestWithErrorLabelsPermission()
|
||||
SchedulerUtils.normalizeAndvalidateRequest(resReq, maxResource, "queue",
|
||||
scheduler, rmContext);
|
||||
fail("Should fail");
|
||||
} catch (InvalidLabelResourceRequestException e) {
|
||||
invalidlabelexception=true;
|
||||
} catch (InvalidResourceRequestException e) {
|
||||
} finally {
|
||||
rmContext.getNodeLabelManager().removeFromClusterNodeLabels(
|
||||
Arrays.asList("x"));
|
||||
}
|
||||
|
||||
Assert.assertTrue("InvalidLabelResourceRequestException excpeted",
|
||||
invalidlabelexception);
|
||||
// queue is "*", always succeeded
|
||||
try {
|
||||
// set queue accessible node labels to empty
|
||||
|
Loading…
Reference in New Issue
Block a user