YARN-9877 - Intermittent TIME_OUT of LogAggregationReport (#5784)

This commit is contained in:
K0K0V0K 2023-07-20 06:30:55 +02:00 committed by GitHub
parent 193ff1c24e
commit 82c8070e93
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 34 additions and 3 deletions

View File

@ -1088,8 +1088,13 @@ public void transition(RMAppImpl app, RMAppEvent event) {
// otherwise, add it to ranNodes for further process
app.ranNodes.add(nodeAddedEvent.getNodeId());
if (!nodeAddedEvent.isCreatedFromAcquiredState()) {
app.logAggregation.addReportIfNecessary(
nodeAddedEvent.getNodeId(), app.getApplicationId());
} else {
LOG.debug("Not considering node for log aggregation yet. nodeId: {}, appId: {}",
nodeAddedEvent.getNodeId(), app.getApplicationId());
}
}
}

View File

@ -23,13 +23,27 @@
public class RMAppRunningOnNodeEvent extends RMAppEvent {
private final NodeId node;
private final boolean createdFromAcquiredState;
public RMAppRunningOnNodeEvent(ApplicationId appId, NodeId node) {
this(appId, node, false);
}
public RMAppRunningOnNodeEvent(
ApplicationId appId,
NodeId node,
boolean createdFromAcquiredState
) {
super(appId, RMAppEventType.APP_RUNNING_ON_NODE);
this.node = node;
this.createdFromAcquiredState = createdFromAcquiredState;
}
public NodeId getNodeId() {
return node;
}
public boolean isCreatedFromAcquiredState() {
return createdFromAcquiredState;
}
}

View File

@ -606,7 +606,7 @@ public void transition(RMContainerImpl container, RMContainerEvent event) {
// Tell the app
container.eventHandler.handle(new RMAppRunningOnNodeEvent(container
.getApplicationAttemptId().getApplicationId(), container.nodeId));
.getApplicationAttemptId().getApplicationId(), container.nodeId, true));
// Opportunistic containers move directly from NEW to ACQUIRED
if (container.getState() == RMContainerState.NEW) {

View File

@ -51,6 +51,7 @@
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.DrainDispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport;
import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEventType;
@ -962,6 +963,17 @@ public void testAppAcceptedAccepted() throws IOException {
assertAppStateLaunchTimeSaved(1234L);
}
@Test
public void testAcquiredReleased() throws IOException {
RMApp application = testCreateAppSubmittedNoRecovery(null);
NodeId nodeId = NodeId.newInstance("host", 1234);
application.handle(
new RMAppRunningOnNodeEvent(application.getApplicationId(), nodeId, true));
Map<NodeId, LogAggregationReport> logAggregationReportsForApp =
application.getLogAggregationReportsForApp();
assertEquals(0, logAggregationReportsForApp.size());
}
@Test
public void testAppAcceptedAttemptKilled() throws IOException,
InterruptedException {