YARN-8679. [ATSv2] If HBase cluster is down for long time, high chances that NM ContainerManager dispatcher get blocked. Contributed by Wangda Tan.

This commit is contained in:
Rohith Sharma K S 2018-08-18 10:26:55 +05:30
parent 79c97f6a0b
commit 4aacbfff60
5 changed files with 14 additions and 8 deletions

View File

@ -90,7 +90,7 @@ public class TestAMLaunchFailure {
// }
//
// @Override
// public void addApplication(ApplicationId applicationId,
// public void addApplicationIfAbsent(ApplicationId applicationId,
// ApplicationMaster master, String user, String queue, Priority priority
// , ApplicationStore appStore)
// throws IOException {

View File

@ -67,7 +67,7 @@ public class TestSchedulerNegotiator {
// return null;
// }
// @Override
// public void addApplication(ApplicationId applicationId,
// public void addApplicationIfAbsent(ApplicationId applicationId,
// ApplicationMaster master, String user, String queue, Priority priority,
// ApplicationStore store)
// throws IOException {

View File

@ -81,7 +81,8 @@ public static void setupClass() throws Exception {
auxService =
PerNodeTimelineCollectorsAuxService.launchServer(new String[0],
collectorManager, conf);
auxService.addApplication(ApplicationId.newInstance(0, 1), "user");
auxService
.addApplicationIfAbsent(ApplicationId.newInstance(0, 1), "user");
} catch (ExitUtil.ExitException e) {
fail();
}

View File

@ -210,7 +210,7 @@ public void initialize() throws Exception {
YarnConfiguration.TIMELINE_SERVICE_PRINCIPAL, "localhost");
}
ApplicationId appId = ApplicationId.newInstance(0, 1);
auxService.addApplication(
auxService.addApplicationIfAbsent(
appId, UserGroupInformation.getCurrentUser().getUserName());
if (!withKerberosLogin) {
AppLevelTimelineCollector collector =

View File

@ -125,7 +125,7 @@ protected void serviceStop() throws Exception {
* @param user Application Master container user.
* @return whether it was added successfully
*/
public boolean addApplication(ApplicationId appId, String user) {
public boolean addApplicationIfAbsent(ApplicationId appId, String user) {
AppLevelTimelineCollector collector =
new AppLevelTimelineCollectorWithAgg(appId, user);
return (collectorManager.putIfAbsent(appId, collector)
@ -156,15 +156,15 @@ public void initializeContainer(ContainerInitializationContext context) {
if (context.getContainerType() == ContainerType.APPLICATION_MASTER) {
ApplicationId appId = context.getContainerId().
getApplicationAttemptId().getApplicationId();
synchronized (appIdToContainerId) {
synchronized (appIdToContainerId){
Set<ContainerId> masterContainers = appIdToContainerId.get(appId);
if (masterContainers == null) {
masterContainers = new HashSet<>();
appIdToContainerId.put(appId, masterContainers);
}
masterContainers.add(context.getContainerId());
addApplication(appId, context.getUser());
}
addApplicationIfAbsent(appId, context.getUser());
}
}
@ -189,6 +189,7 @@ protected Future removeApplicationCollector(final ContainerId containerId) {
containerId.getApplicationAttemptId().getApplicationId();
return scheduler.schedule(new Runnable() {
public void run() {
boolean shouldRemoveApplication = false;
synchronized (appIdToContainerId) {
Set<ContainerId> masterContainers = appIdToContainerId.get(appId);
if (masterContainers == null) {
@ -199,10 +200,14 @@ public void run() {
masterContainers.remove(containerId);
if (masterContainers.size() == 0) {
// remove only if it is last master container
removeApplication(appId);
shouldRemoveApplication = true;
appIdToContainerId.remove(appId);
}
}
if (shouldRemoveApplication) {
removeApplication(appId);
}
}
}, collectorLingerPeriod, TimeUnit.MILLISECONDS);
}