YARN-10437. Destroy yarn service if any YarnException occurs during submitApp.Contributed by D M Murali Krishna Reddy

(cherry picked from commit 2d62dced4b)
This commit is contained in:
Brahma Reddy Battula 2021-03-30 09:39:00 +05:30
parent d6aaf516d9
commit e9d8f16a70
2 changed files with 55 additions and 2 deletions

View File

@ -557,7 +557,13 @@ public ApplicationId actionCreate(Service service)
// Write the definition first and then submit - AM will read the definition
ServiceApiUtil.createDirAndPersistApp(fs, appDir, service);
ApplicationId appId = submitApp(service);
ApplicationId appId;
try {
appId = submitApp(service);
} catch(YarnException e){
actionDestroy(serviceName);
throw e;
}
cachedAppInfo.put(serviceName, new AppInfo(appId, service
.getKerberosPrincipal().getPrincipalName()));
service.setId(appId.toString());
@ -1362,7 +1368,13 @@ public ApplicationId actionStartAndGetId(String serviceName) throws
ServiceApiUtil.validateAndResolveService(service, fs, getConfig());
// see if it is actually running and bail out;
verifyNoLiveAppInRM(serviceName, "start");
ApplicationId appId = submitApp(service);
ApplicationId appId;
try {
appId = submitApp(service);
} catch (YarnException e) {
actionDestroy(serviceName);
throw e;
}
cachedAppInfo.put(serviceName, new AppInfo(appId, service
.getKerberosPrincipal().getPrincipalName()));
service.setId(appId.toString());

View File

@ -41,10 +41,12 @@
import org.apache.hadoop.yarn.service.api.records.PlacementPolicy;
import org.apache.hadoop.yarn.service.api.records.PlacementScope;
import org.apache.hadoop.yarn.service.api.records.PlacementType;
import org.apache.hadoop.yarn.service.api.records.Resource;
import org.apache.hadoop.yarn.service.api.records.Service;
import org.apache.hadoop.yarn.service.api.records.ServiceState;
import org.apache.hadoop.yarn.service.client.ServiceClient;
import org.apache.hadoop.yarn.service.conf.YarnServiceConstants;
import org.apache.hadoop.yarn.service.exceptions.SliderException;
import org.apache.hadoop.yarn.service.utils.ServiceApiUtil;
import org.apache.hadoop.yarn.service.utils.SliderFileSystem;
import org.hamcrest.CoreMatchers;
@ -981,4 +983,43 @@ public void testAMFailureValidity() throws Exception {
Assert.assertEquals(ServiceState.STABLE, client.getStatus(
exampleApp.getName()).getState());
}
public Service createServiceWithSingleComp(int memory){
Service service = new Service();
service.setName("example-app");
service.setVersion("v1");
Component component = new Component();
component.setName("sleep");
component.setNumberOfContainers(1L);
component.setLaunchCommand("sleep 1000");
org.apache.hadoop.yarn.service.api.records.Resource resource = new Resource();
resource.setMemory(Integer.toString(memory));
resource.setCpus(1);
component.setResource(resource);
service.addComponent(component);
return service;
}
@Test(timeout = 200000)
public void testServiceSameNameWithFailure() throws Exception{
setupInternal(NUM_NMS);
ServiceClient client = createClient(getConf());
try {
client.actionCreate(createServiceWithSingleComp(1024000));
Assert.fail("Service should throw YarnException as memory is " +
"configured as 1000GB, which is more than allowed");
} catch (YarnException e) {
Assert.assertTrue(true);
}
Service service = createServiceWithSingleComp(128);
try {
client.actionCreate(service);
} catch (SliderException e){
Assert.fail("Not able to submit service as the files related to" +
" failed service with same name are not cleared");
}
waitForServiceToBeStable(client,service);
client.actionStop(service.getName(), true);
client.actionDestroy(service.getName());
}
}