YARN-5028. RMStateStore should trim down app state for completed applications. Contributed by Gergo Repas.

This commit is contained in:
Yufei Gu 2018-02-21 11:42:26 -08:00
parent 004b722372
commit 92cbbfe79e
3 changed files with 102 additions and 1 deletions

View File

@ -47,6 +47,7 @@
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ReservationId; import org.apache.hadoop.yarn.api.records.ReservationId;
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerLaunchContextPBImpl;
import org.apache.hadoop.yarn.conf.HAUtil; import org.apache.hadoop.yarn.conf.HAUtil;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.AsyncDispatcher;
@ -65,6 +66,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
@ -257,6 +259,9 @@ public RMStateStoreState transition(RMStateStore store,
appState.getApplicationSubmissionContext().getApplicationId(); appState.getApplicationSubmissionContext().getApplicationId();
LOG.info("Updating info for app: " + appId); LOG.info("Updating info for app: " + appId);
try { try {
if (isAppStateFinal(appState)) {
pruneAppState(appState);
}
store.updateApplicationStateInternal(appId, appState); store.updateApplicationStateInternal(appId, appState);
if (((RMStateUpdateAppEvent) event).isNotifyApplication()) { if (((RMStateUpdateAppEvent) event).isNotifyApplication()) {
store.notifyApplication(new RMAppEvent(appId, store.notifyApplication(new RMAppEvent(appId,
@ -276,7 +281,34 @@ public RMStateStoreState transition(RMStateStore store,
} }
} }
return finalState(isFenced); return finalState(isFenced);
}; }
private boolean isAppStateFinal(ApplicationStateData appState) {
RMAppState state = appState.getState();
return state == RMAppState.FINISHED || state == RMAppState.FAILED ||
state == RMAppState.KILLED;
}
private void pruneAppState(ApplicationStateData appState) {
ApplicationSubmissionContext srcCtx =
appState.getApplicationSubmissionContext();
ApplicationSubmissionContextPBImpl context =
new ApplicationSubmissionContextPBImpl();
// most fields in the ApplicationSubmissionContext are not needed,
// but the following few need to be present for recovery to succeed
context.setApplicationId(srcCtx.getApplicationId());
context.setResource(srcCtx.getResource());
context.setQueue(srcCtx.getQueue());
context.setAMContainerResourceRequests(
srcCtx.getAMContainerResourceRequests());
context.setApplicationType(srcCtx.getApplicationType());
ContainerLaunchContextPBImpl amContainerSpec =
new ContainerLaunchContextPBImpl();
amContainerSpec.setApplicationACLs(
srcCtx.getAMContainerSpec().getApplicationACLs());
context.setAMContainerSpec(amContainerSpec);
appState.setApplicationSubmissionContext(context);
}
} }
private static class RemoveAppTransition implements private static class RemoveAppTransition implements

View File

@ -53,6 +53,7 @@
import org.apache.hadoop.yarn.api.records.ReservationId; import org.apache.hadoop.yarn.api.records.ReservationId;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerLaunchContextPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.Dispatcher;
@ -162,6 +163,7 @@ protected RMApp storeApp(RMStateStore store, ApplicationId appId,
ApplicationSubmissionContext context = ApplicationSubmissionContext context =
new ApplicationSubmissionContextPBImpl(); new ApplicationSubmissionContextPBImpl();
context.setApplicationId(appId); context.setApplicationId(appId);
context.setAMContainerSpec(new ContainerLaunchContextPBImpl());
RMApp mockApp = mock(RMApp.class); RMApp mockApp = mock(RMApp.class);
when(mockApp.getApplicationId()).thenReturn(appId); when(mockApp.getApplicationId()).thenReturn(appId);
@ -378,6 +380,7 @@ void testRMAppStateStore(RMStateStoreHelper stateStoreHelper,
ApplicationSubmissionContext dummyContext = ApplicationSubmissionContext dummyContext =
new ApplicationSubmissionContextPBImpl(); new ApplicationSubmissionContextPBImpl();
dummyContext.setApplicationId(dummyAppId); dummyContext.setApplicationId(dummyAppId);
dummyContext.setAMContainerSpec(new ContainerLaunchContextPBImpl());
ApplicationStateData dummyApp = ApplicationStateData dummyApp =
ApplicationStateData.newInstance(appState.getSubmitTime(), ApplicationStateData.newInstance(appState.getSubmitTime(),
appState.getStartTime(), appState.getUser(), dummyContext, appState.getStartTime(), appState.getUser(), dummyContext,

View File

@ -35,7 +35,9 @@
import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.yarn.api.records.*; import org.apache.hadoop.yarn.api.records.*;
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerLaunchContextPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl;
import org.apache.hadoop.yarn.conf.HAUtil; import org.apache.hadoop.yarn.conf.HAUtil;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.Event;
@ -49,6 +51,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
@ -83,6 +86,7 @@
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
import java.io.IOException; import java.io.IOException;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
@ -845,6 +849,7 @@ private void finishAppWithAttempts(RMState state, RMStateStore store,
ApplicationSubmissionContext context = ApplicationSubmissionContext context =
new ApplicationSubmissionContextPBImpl(); new ApplicationSubmissionContextPBImpl();
context.setApplicationId(appId); context.setApplicationId(appId);
context.setAMContainerSpec(new ContainerLaunchContextPBImpl());
appStateNew = createAppState(context, submitTime, startTime, finishTime, appStateNew = createAppState(context, submitTime, startTime, finishTime,
true); true);
} else { } else {
@ -1488,4 +1493,65 @@ public void testDelegationTokenNodeWithSplitChangeAcrossRestarts()
tokensWithIndex, sequenceNumber, 3); tokensWithIndex, sequenceNumber, 3);
store.close(); store.close();
} }
@Test
public void testAppSubmissionContextIsPrunedInFinalApplicationState()
throws Exception {
TestZKRMStateStoreTester zkTester = new TestZKRMStateStoreTester();
ApplicationId appId = ApplicationId.fromString("application_1234_0010");
Configuration conf = createConfForDelegationTokenNodeSplit(1);
RMStateStore store = zkTester.getRMStateStore(conf);
ApplicationSubmissionContext ctx =
new ApplicationSubmissionContextPBImpl();
ctx.setApplicationId(appId);
ctx.setQueue("a_queue");
ContainerLaunchContextPBImpl containerLaunchCtx =
new ContainerLaunchContextPBImpl();
containerLaunchCtx.setCommands(Collections.singletonList("a_command"));
ctx.setAMContainerSpec(containerLaunchCtx);
Resource resource = new ResourcePBImpl();
resource.setMemorySize(17L);
ctx.setResource(resource);
Map<String, String> schedulingPropertiesMap =
Collections.singletonMap("a_key", "a_value");
ctx.setApplicationSchedulingPropertiesMap(schedulingPropertiesMap);
ApplicationStateDataPBImpl appState = new ApplicationStateDataPBImpl();
appState.setState(RMAppState.RUNNING);
appState.setApplicationSubmissionContext(ctx);
store.storeApplicationStateInternal(appId, appState);
RMState rmState = store.loadState();
assertEquals(1, rmState.getApplicationState().size());
ctx = rmState.getApplicationState().get(appId)
.getApplicationSubmissionContext();
appState.setState(RMAppState.RUNNING);
store.handleStoreEvent(new RMStateUpdateAppEvent(appState, false, null));
rmState = store.loadState();
ctx = rmState.getApplicationState().get(appId)
.getApplicationSubmissionContext();
assertEquals("ApplicationSchedulingPropertiesMap should not have been "
+ "pruned from the application submission context before the "
+ "FINISHED state",
schedulingPropertiesMap, ctx.getApplicationSchedulingPropertiesMap());
appState.setState(RMAppState.FINISHED);
store.handleStoreEvent(new RMStateUpdateAppEvent(appState, false, null));
rmState = store.loadState();
ctx = rmState.getApplicationState().get(appId)
.getApplicationSubmissionContext();
assertEquals(appId, ctx.getApplicationId());
assertEquals("a_queue", ctx.getQueue());
assertNotNull(ctx.getAMContainerSpec());
assertEquals(17L, ctx.getResource().getMemorySize());
assertEquals("ApplicationSchedulingPropertiesMap should have been pruned"
+ " from the application submission context when in FINISHED STATE",
Collections.emptyMap(), ctx.getApplicationSchedulingPropertiesMap());
store.close();
}
} }