YARN-8679. [ATSv2] If HBase cluster is down for long time, high chances that NM ContainerManager dispatcher get blocked. Contributed by Wangda Tan.
This commit is contained in:
parent
79c97f6a0b
commit
4aacbfff60
@ -90,7 +90,7 @@ public class TestAMLaunchFailure {
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public void addApplication(ApplicationId applicationId,
|
||||
// public void addApplicationIfAbsent(ApplicationId applicationId,
|
||||
// ApplicationMaster master, String user, String queue, Priority priority
|
||||
// , ApplicationStore appStore)
|
||||
// throws IOException {
|
||||
|
@ -67,7 +67,7 @@ public class TestSchedulerNegotiator {
|
||||
// return null;
|
||||
// }
|
||||
// @Override
|
||||
// public void addApplication(ApplicationId applicationId,
|
||||
// public void addApplicationIfAbsent(ApplicationId applicationId,
|
||||
// ApplicationMaster master, String user, String queue, Priority priority,
|
||||
// ApplicationStore store)
|
||||
// throws IOException {
|
||||
|
@ -81,7 +81,8 @@ public static void setupClass() throws Exception {
|
||||
auxService =
|
||||
PerNodeTimelineCollectorsAuxService.launchServer(new String[0],
|
||||
collectorManager, conf);
|
||||
auxService.addApplication(ApplicationId.newInstance(0, 1), "user");
|
||||
auxService
|
||||
.addApplicationIfAbsent(ApplicationId.newInstance(0, 1), "user");
|
||||
} catch (ExitUtil.ExitException e) {
|
||||
fail();
|
||||
}
|
||||
|
@ -210,7 +210,7 @@ public void initialize() throws Exception {
|
||||
YarnConfiguration.TIMELINE_SERVICE_PRINCIPAL, "localhost");
|
||||
}
|
||||
ApplicationId appId = ApplicationId.newInstance(0, 1);
|
||||
auxService.addApplication(
|
||||
auxService.addApplicationIfAbsent(
|
||||
appId, UserGroupInformation.getCurrentUser().getUserName());
|
||||
if (!withKerberosLogin) {
|
||||
AppLevelTimelineCollector collector =
|
||||
|
@ -125,7 +125,7 @@ protected void serviceStop() throws Exception {
|
||||
* @param user Application Master container user.
|
||||
* @return whether it was added successfully
|
||||
*/
|
||||
public boolean addApplication(ApplicationId appId, String user) {
|
||||
public boolean addApplicationIfAbsent(ApplicationId appId, String user) {
|
||||
AppLevelTimelineCollector collector =
|
||||
new AppLevelTimelineCollectorWithAgg(appId, user);
|
||||
return (collectorManager.putIfAbsent(appId, collector)
|
||||
@ -156,15 +156,15 @@ public void initializeContainer(ContainerInitializationContext context) {
|
||||
if (context.getContainerType() == ContainerType.APPLICATION_MASTER) {
|
||||
ApplicationId appId = context.getContainerId().
|
||||
getApplicationAttemptId().getApplicationId();
|
||||
synchronized (appIdToContainerId) {
|
||||
synchronized (appIdToContainerId){
|
||||
Set<ContainerId> masterContainers = appIdToContainerId.get(appId);
|
||||
if (masterContainers == null) {
|
||||
masterContainers = new HashSet<>();
|
||||
appIdToContainerId.put(appId, masterContainers);
|
||||
}
|
||||
masterContainers.add(context.getContainerId());
|
||||
addApplication(appId, context.getUser());
|
||||
}
|
||||
addApplicationIfAbsent(appId, context.getUser());
|
||||
}
|
||||
}
|
||||
|
||||
@ -189,6 +189,7 @@ protected Future removeApplicationCollector(final ContainerId containerId) {
|
||||
containerId.getApplicationAttemptId().getApplicationId();
|
||||
return scheduler.schedule(new Runnable() {
|
||||
public void run() {
|
||||
boolean shouldRemoveApplication = false;
|
||||
synchronized (appIdToContainerId) {
|
||||
Set<ContainerId> masterContainers = appIdToContainerId.get(appId);
|
||||
if (masterContainers == null) {
|
||||
@ -199,10 +200,14 @@ public void run() {
|
||||
masterContainers.remove(containerId);
|
||||
if (masterContainers.size() == 0) {
|
||||
// remove only if it is last master container
|
||||
removeApplication(appId);
|
||||
shouldRemoveApplication = true;
|
||||
appIdToContainerId.remove(appId);
|
||||
}
|
||||
}
|
||||
|
||||
if (shouldRemoveApplication) {
|
||||
removeApplication(appId);
|
||||
}
|
||||
}
|
||||
}, collectorLingerPeriod, TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user