YARN-9194. Invalid event: REGISTERED and LAUNCH_FAILED at FAILED, and NullPointerException happens in RM while shutdown a NM. (lujie via wangda)

Change-Id: I4359f59a73a278a941f4bb9d106dd38c9cb471fe
This commit is contained in:
Wangda Tan 2019-01-17 14:20:10 -08:00
parent 0a46baecd3
commit 6d7eedfd28
2 changed files with 90 additions and 4 deletions

View File

@ -437,9 +437,11 @@ RMAppAttemptEventType.STATUS_UPDATE, new StatusUpdateTransition())
RMAppAttemptState.FAILED,
EnumSet.of(
RMAppAttemptEventType.LAUNCHED,
RMAppAttemptEventType.LAUNCH_FAILED,
RMAppAttemptEventType.EXPIRE,
RMAppAttemptEventType.KILL,
RMAppAttemptEventType.FAIL,
RMAppAttemptEventType.REGISTERED,
RMAppAttemptEventType.UNREGISTERED,
RMAppAttemptEventType.STATUS_UPDATE,
RMAppAttemptEventType.CONTAINER_ALLOCATED))
@ -1203,10 +1205,16 @@ public RMAppAttemptState transition(RMAppAttemptImpl appAttempt,
}
// Set the masterContainer
appAttempt.setMasterContainer(amContainerAllocation.getContainers()
.get(0));
Container amContainer = amContainerAllocation.getContainers().get(0);
RMContainerImpl rmMasterContainer = (RMContainerImpl)appAttempt.scheduler
.getRMContainer(appAttempt.getMasterContainer().getId());
.getRMContainer(amContainer.getId());
//while one NM is removed, the scheduler will clean the container,the
//following CONTAINER_FINISHED event will handle the cleaned container.
//so just return RMAppAttemptState.SCHEDULED
if (rmMasterContainer == null) {
return RMAppAttemptState.SCHEDULED;
}
appAttempt.setMasterContainer(amContainer);
rmMasterContainer.setAMContainer(true);
// The node set in NMTokenSecrentManager is used for marking whether the
// NMToken has been issued for this node to the AM.

View File

@ -986,7 +986,7 @@ public void testLaunchedAtFinalSaving() {
public void testAttemptAddedAtFinalSaving() {
submitApplicationAttempt();
// SUBNITED->FINAL_SAVING
// SUBMITTED->FINAL_SAVING
applicationAttempt.handle(new RMAppAttemptEvent(applicationAttempt
.getAppAttemptId(), RMAppAttemptEventType.KILL));
assertEquals(RMAppAttemptState.FINAL_SAVING,
@ -999,6 +999,56 @@ public void testAttemptAddedAtFinalSaving() {
applicationAttempt.getAppAttemptState());
}
@Test(timeout = 10000)
public void testAttemptRegisteredAtFailed() {
Container amContainer = allocateApplicationAttempt();
launchApplicationAttempt(amContainer);
//send CONTAINER_FINISHED event
NodeId anyNodeId = NodeId.newInstance("host", 1234);
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(
amContainer.getId(), ContainerState.COMPLETE, "", 0,
amContainer.getResource()), anyNodeId));
assertEquals(RMAppAttemptState.FINAL_SAVING,
applicationAttempt.getAppAttemptState());
sendAttemptUpdateSavedEvent(applicationAttempt);
assertEquals(RMAppAttemptState.FAILED,
applicationAttempt.getAppAttemptState());
//send REGISTERED event
applicationAttempt.handle(new RMAppAttemptEvent(applicationAttempt
.getAppAttemptId(), RMAppAttemptEventType.REGISTERED));
assertEquals(RMAppAttemptState.FAILED,
applicationAttempt.getAppAttemptState());
}
@Test
public void testAttemptLaunchFailedAtFailed() {
Container amContainer = allocateApplicationAttempt();
launchApplicationAttempt(amContainer);
//send CONTAINER_FINISHED event
NodeId anyNodeId = NodeId.newInstance("host", 1234);
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(
amContainer.getId(), ContainerState.COMPLETE, "", 0,
amContainer.getResource()), anyNodeId));
assertEquals(RMAppAttemptState.FINAL_SAVING,
applicationAttempt.getAppAttemptState());
sendAttemptUpdateSavedEvent(applicationAttempt);
assertEquals(RMAppAttemptState.FAILED,
applicationAttempt.getAppAttemptState());
//send LAUNCH_FAILED event
applicationAttempt.handle(new RMAppAttemptEvent(applicationAttempt
.getAppAttemptId(), RMAppAttemptEventType.LAUNCH_FAILED));
assertEquals(RMAppAttemptState.FAILED,
applicationAttempt.getAppAttemptState());
}
@Test
public void testAMCrashAtAllocated() {
Container amContainer = allocateApplicationAttempt();
@ -1598,6 +1648,34 @@ public void testFailedToFailed() {
assertTrue(found);
}
@Test
public void testContainerRemovedBeforeAllocate() {
scheduleApplicationAttempt();
// Mock the allocation of AM container
Container container = mock(Container.class);
Resource resource = BuilderUtils.newResource(2048, 1);
when(container.getId()).thenReturn(
BuilderUtils.newContainerId(applicationAttempt.getAppAttemptId(), 1));
when(container.getResource()).thenReturn(resource);
Allocation allocation = mock(Allocation.class);
when(allocation.getContainers()).
thenReturn(Collections.singletonList(container));
when(scheduler.allocate(any(ApplicationAttemptId.class), any(List.class),
any(List.class), any(List.class), any(List.class), any(List.class),
any(ContainerUpdates.class))).
thenReturn(allocation);
//container removed, so return null
when(scheduler.getRMContainer(container.getId())).
thenReturn(null);
applicationAttempt.handle(
new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(),
RMAppAttemptEventType.CONTAINER_ALLOCATED));
assertEquals(RMAppAttemptState.SCHEDULED,
applicationAttempt.getAppAttemptState());
}
@SuppressWarnings("deprecation")
@Test