YARN-8862. [BackPort] [GPG] Add Yarn Registry cleanup in ApplicationCleaner. (#6083) Contributed by Shilun Fan.
Reviewed-by: Inigo Goiri <inigoiri@apache.org> Signed-off-by: Shilun Fan <slfan1989@apache.org>
This commit is contained in:
parent
35c42e4039
commit
1d2afc5cf6
@ -26,6 +26,7 @@
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
import org.apache.commons.collections.MapUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.registry.client.api.BindFlags;
|
import org.apache.hadoop.registry.client.api.BindFlags;
|
||||||
@ -142,9 +143,7 @@ public synchronized boolean writeAMRMTokenForUAM(ApplicationId appId,
|
|||||||
// Then update the subClusterTokenMap
|
// Then update the subClusterTokenMap
|
||||||
subClusterTokenMap.put(subClusterId, token);
|
subClusterTokenMap.put(subClusterId, token);
|
||||||
} catch (YarnException | IOException e) {
|
} catch (YarnException | IOException e) {
|
||||||
LOG.error(
|
LOG.error("Failed writing AMRMToken to registry for subcluster {}.", subClusterId, e);
|
||||||
"Failed writing AMRMToken to registry for subcluster " + subClusterId,
|
|
||||||
e);
|
|
||||||
}
|
}
|
||||||
return update;
|
return update;
|
||||||
}
|
}
|
||||||
@ -189,8 +188,7 @@ public synchronized boolean writeAMRMTokenForUAM(ApplicationId appId,
|
|||||||
|
|
||||||
retMap.put(scId, amrmToken);
|
retMap.put(scId, amrmToken);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.error("Failed reading registry key " + key
|
LOG.error("Failed reading registry key {}, skipping subcluster {}.", key, scId, e);
|
||||||
+ ", skipping subcluster " + scId, e);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -202,24 +200,39 @@ public synchronized boolean writeAMRMTokenForUAM(ApplicationId appId,
|
|||||||
/**
|
/**
|
||||||
* Remove an application from registry.
|
* Remove an application from registry.
|
||||||
*
|
*
|
||||||
* @param appId application id
|
* @param appId application id.
|
||||||
*/
|
*/
|
||||||
public synchronized void removeAppFromRegistry(ApplicationId appId) {
|
public synchronized void removeAppFromRegistry(ApplicationId appId) {
|
||||||
|
removeAppFromRegistry(appId, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove an application from registry.
|
||||||
|
*
|
||||||
|
* @param appId application id
|
||||||
|
* @param ignoreMemoryState whether to ignore the memory data in terms of
|
||||||
|
* known application
|
||||||
|
*/
|
||||||
|
public synchronized void removeAppFromRegistry(ApplicationId appId,
|
||||||
|
boolean ignoreMemoryState) {
|
||||||
Map<String, Token<AMRMTokenIdentifier>> subClusterTokenMap =
|
Map<String, Token<AMRMTokenIdentifier>> subClusterTokenMap =
|
||||||
this.appSubClusterTokenMap.get(appId);
|
this.appSubClusterTokenMap.get(appId);
|
||||||
LOG.info("Removing all registry entries for {}", appId);
|
if (!ignoreMemoryState) {
|
||||||
|
if (MapUtils.isEmpty(subClusterTokenMap)) {
|
||||||
if (subClusterTokenMap == null || subClusterTokenMap.size() == 0) {
|
return;
|
||||||
return;
|
}
|
||||||
}
|
}
|
||||||
|
LOG.info("Removing all registry entries for {}.", appId);
|
||||||
|
|
||||||
// Lastly remove the application directory
|
// Lastly remove the application directory
|
||||||
String key = getRegistryKey(appId, null);
|
String key = getRegistryKey(appId, null);
|
||||||
try {
|
try {
|
||||||
removeKeyRegistry(this.registry, this.user, key, true, true);
|
removeKeyRegistry(this.registry, this.user, key, true, true);
|
||||||
subClusterTokenMap.clear();
|
if (subClusterTokenMap != null) {
|
||||||
|
subClusterTokenMap.clear();
|
||||||
|
}
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
LOG.error("Failed removing registry directory key " + key, e);
|
LOG.error("Failed removing registry directory key {}.", key, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -247,7 +260,7 @@ public String run() {
|
|||||||
}
|
}
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
if (throwIfFails) {
|
if (throwIfFails) {
|
||||||
LOG.error("Registry resolve key " + key + " failed", e);
|
LOG.error("Registry resolve key {} failed.", key, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
@ -271,7 +284,7 @@ public Boolean run() {
|
|||||||
return true;
|
return true;
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
if (throwIfFails) {
|
if (throwIfFails) {
|
||||||
LOG.error("Registry remove key " + key + " failed", e);
|
LOG.error("Registry remove key {} failed.", key, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -300,7 +313,7 @@ public Boolean run() {
|
|||||||
return true;
|
return true;
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
if (throwIfFails) {
|
if (throwIfFails) {
|
||||||
LOG.error("Registry write key " + key + " failed", e);
|
LOG.error("Registry write key {} failed.", key, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
@ -317,18 +330,15 @@ public Boolean run() {
|
|||||||
private List<String> listDirRegistry(final RegistryOperations registryImpl,
|
private List<String> listDirRegistry(final RegistryOperations registryImpl,
|
||||||
UserGroupInformation ugi, final String key, final boolean throwIfFails)
|
UserGroupInformation ugi, final String key, final boolean throwIfFails)
|
||||||
throws YarnException {
|
throws YarnException {
|
||||||
List<String> result = ugi.doAs(new PrivilegedAction<List<String>>() {
|
List<String> result = ugi.doAs((PrivilegedAction<List<String>>) () -> {
|
||||||
@Override
|
try {
|
||||||
public List<String> run() {
|
return registryImpl.list(key);
|
||||||
try {
|
} catch (Throwable e) {
|
||||||
return registryImpl.list(key);
|
if (throwIfFails) {
|
||||||
} catch (Throwable e) {
|
LOG.error("Registry list key {} failed.", key, e);
|
||||||
if (throwIfFails) {
|
|
||||||
LOG.error("Registry list key " + key + " failed", e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
});
|
});
|
||||||
if (result == null && throwIfFails) {
|
if (result == null && throwIfFails) {
|
||||||
throw new YarnException("Registry list key " + key + " failed");
|
throw new YarnException("Registry list key " + key + " failed");
|
||||||
|
@ -87,4 +87,31 @@ public void testBasicCase() {
|
|||||||
this.registryClient.loadStateFromRegistry(appId).size());
|
this.registryClient.loadStateFromRegistry(appId).size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRemoveWithMemoryState() {
|
||||||
|
ApplicationId appId1 = ApplicationId.newInstance(0, 0);
|
||||||
|
ApplicationId appId2 = ApplicationId.newInstance(0, 1);
|
||||||
|
String scId0 = "subcluster0";
|
||||||
|
|
||||||
|
this.registryClient.writeAMRMTokenForUAM(appId1, scId0, new Token<>());
|
||||||
|
this.registryClient.writeAMRMTokenForUAM(appId2, scId0, new Token<>());
|
||||||
|
Assert.assertEquals(2, this.registryClient.getAllApplications().size());
|
||||||
|
|
||||||
|
// Create a new client instance
|
||||||
|
this.registryClient =
|
||||||
|
new FederationRegistryClient(this.conf, this.registry, this.user);
|
||||||
|
|
||||||
|
this.registryClient.loadStateFromRegistry(appId2);
|
||||||
|
// Should remove app2
|
||||||
|
this.registryClient.removeAppFromRegistry(appId2, false);
|
||||||
|
Assert.assertEquals(1, this.registryClient.getAllApplications().size());
|
||||||
|
|
||||||
|
// Should not remove app1 since memory state don't have it
|
||||||
|
this.registryClient.removeAppFromRegistry(appId1, false);
|
||||||
|
Assert.assertEquals(1, this.registryClient.getAllApplications().size());
|
||||||
|
|
||||||
|
// Should remove app1
|
||||||
|
this.registryClient.removeAppFromRegistry(appId1, true);
|
||||||
|
Assert.assertEquals(0, this.registryClient.getAllApplications().size());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
package org.apache.hadoop.yarn.server.globalpolicygenerator;
|
package org.apache.hadoop.yarn.server.globalpolicygenerator;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.server.federation.utils.FederationRegistryClient;
|
||||||
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
|
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -32,4 +33,8 @@ public interface GPGContext {
|
|||||||
GPGPolicyFacade getPolicyFacade();
|
GPGPolicyFacade getPolicyFacade();
|
||||||
|
|
||||||
void setPolicyFacade(GPGPolicyFacade facade);
|
void setPolicyFacade(GPGPolicyFacade facade);
|
||||||
|
|
||||||
|
FederationRegistryClient getRegistryClient();
|
||||||
|
|
||||||
|
void setRegistryClient(FederationRegistryClient client);
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
package org.apache.hadoop.yarn.server.globalpolicygenerator;
|
package org.apache.hadoop.yarn.server.globalpolicygenerator;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.server.federation.utils.FederationRegistryClient;
|
||||||
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
|
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -27,6 +28,7 @@ public class GPGContextImpl implements GPGContext {
|
|||||||
|
|
||||||
private FederationStateStoreFacade facade;
|
private FederationStateStoreFacade facade;
|
||||||
private GPGPolicyFacade policyFacade;
|
private GPGPolicyFacade policyFacade;
|
||||||
|
private FederationRegistryClient registryClient;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FederationStateStoreFacade getStateStoreFacade() {
|
public FederationStateStoreFacade getStateStoreFacade() {
|
||||||
@ -48,4 +50,14 @@ public GPGPolicyFacade getPolicyFacade(){
|
|||||||
public void setPolicyFacade(GPGPolicyFacade gpgPolicyfacade){
|
public void setPolicyFacade(GPGPolicyFacade gpgPolicyfacade){
|
||||||
policyFacade = gpgPolicyfacade;
|
policyFacade = gpgPolicyfacade;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FederationRegistryClient getRegistryClient() {
|
||||||
|
return registryClient;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setRegistryClient(FederationRegistryClient client) {
|
||||||
|
registryClient = client;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
import org.apache.hadoop.security.HttpCrossOriginFilterInitializer;
|
import org.apache.hadoop.security.HttpCrossOriginFilterInitializer;
|
||||||
import org.apache.hadoop.security.SecurityUtil;
|
import org.apache.hadoop.security.SecurityUtil;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
import org.apache.hadoop.registry.client.api.RegistryOperations;
|
||||||
import org.apache.hadoop.service.CompositeService;
|
import org.apache.hadoop.service.CompositeService;
|
||||||
import org.apache.hadoop.util.GenericOptionsParser;
|
import org.apache.hadoop.util.GenericOptionsParser;
|
||||||
import org.apache.hadoop.util.JvmPauseMonitor;
|
import org.apache.hadoop.util.JvmPauseMonitor;
|
||||||
@ -46,6 +47,7 @@
|
|||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
|
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
|
||||||
|
import org.apache.hadoop.yarn.server.federation.utils.FederationRegistryClient;
|
||||||
import org.apache.hadoop.yarn.server.globalpolicygenerator.applicationcleaner.ApplicationCleaner;
|
import org.apache.hadoop.yarn.server.globalpolicygenerator.applicationcleaner.ApplicationCleaner;
|
||||||
import org.apache.hadoop.yarn.server.globalpolicygenerator.policygenerator.PolicyGenerator;
|
import org.apache.hadoop.yarn.server.globalpolicygenerator.policygenerator.PolicyGenerator;
|
||||||
import org.apache.hadoop.yarn.server.globalpolicygenerator.subclustercleaner.SubClusterCleaner;
|
import org.apache.hadoop.yarn.server.globalpolicygenerator.subclustercleaner.SubClusterCleaner;
|
||||||
@ -81,6 +83,7 @@ public class GlobalPolicyGenerator extends CompositeService {
|
|||||||
|
|
||||||
// Federation Variables
|
// Federation Variables
|
||||||
private GPGContext gpgContext;
|
private GPGContext gpgContext;
|
||||||
|
private RegistryOperations registry;
|
||||||
|
|
||||||
// Scheduler service that runs tasks periodically
|
// Scheduler service that runs tasks periodically
|
||||||
private ScheduledThreadPoolExecutor scheduledExecutorService;
|
private ScheduledThreadPoolExecutor scheduledExecutorService;
|
||||||
@ -123,6 +126,17 @@ protected void serviceInit(Configuration conf) throws Exception {
|
|||||||
new GPGPolicyFacade(this.gpgContext.getStateStoreFacade(), conf);
|
new GPGPolicyFacade(this.gpgContext.getStateStoreFacade(), conf);
|
||||||
this.gpgContext.setPolicyFacade(gpgPolicyFacade);
|
this.gpgContext.setPolicyFacade(gpgPolicyFacade);
|
||||||
|
|
||||||
|
this.registry = FederationStateStoreFacade.createInstance(conf,
|
||||||
|
YarnConfiguration.YARN_REGISTRY_CLASS,
|
||||||
|
YarnConfiguration.DEFAULT_YARN_REGISTRY_CLASS,
|
||||||
|
RegistryOperations.class);
|
||||||
|
this.registry.init(conf);
|
||||||
|
|
||||||
|
UserGroupInformation user = UserGroupInformation.getCurrentUser();
|
||||||
|
FederationRegistryClient registryClient =
|
||||||
|
new FederationRegistryClient(conf, this.registry, user);
|
||||||
|
this.gpgContext.setRegistryClient(registryClient);
|
||||||
|
|
||||||
this.scheduledExecutorService = new ScheduledThreadPoolExecutor(
|
this.scheduledExecutorService = new ScheduledThreadPoolExecutor(
|
||||||
conf.getInt(YarnConfiguration.GPG_SCHEDULED_EXECUTOR_THREADS,
|
conf.getInt(YarnConfiguration.GPG_SCHEDULED_EXECUTOR_THREADS,
|
||||||
YarnConfiguration.DEFAULT_GPG_SCHEDULED_EXECUTOR_THREADS));
|
YarnConfiguration.DEFAULT_GPG_SCHEDULED_EXECUTOR_THREADS));
|
||||||
@ -157,6 +171,8 @@ protected void serviceStart() throws Exception {
|
|||||||
|
|
||||||
super.serviceStart();
|
super.serviceStart();
|
||||||
|
|
||||||
|
this.registry.start();
|
||||||
|
|
||||||
// Schedule SubClusterCleaner service
|
// Schedule SubClusterCleaner service
|
||||||
Configuration config = getConfig();
|
Configuration config = getConfig();
|
||||||
long scCleanerIntervalMs = config.getTimeDuration(
|
long scCleanerIntervalMs = config.getTimeDuration(
|
||||||
@ -214,6 +230,11 @@ protected void serviceStart() throws Exception {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void serviceStop() throws Exception {
|
protected void serviceStop() throws Exception {
|
||||||
|
if (this.registry != null) {
|
||||||
|
this.registry.stop();
|
||||||
|
this.registry = null;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (this.scheduledExecutorService != null
|
if (this.scheduledExecutorService != null
|
||||||
&& !this.scheduledExecutorService.isShutdown()) {
|
&& !this.scheduledExecutorService.isShutdown()) {
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
package org.apache.hadoop.yarn.server.globalpolicygenerator.applicationcleaner;
|
package org.apache.hadoop.yarn.server.globalpolicygenerator.applicationcleaner;
|
||||||
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.commons.lang3.time.DurationFormatUtils;
|
import org.apache.commons.lang3.time.DurationFormatUtils;
|
||||||
@ -27,9 +28,11 @@
|
|||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
|
import org.apache.hadoop.yarn.server.federation.utils.FederationRegistryClient;
|
||||||
import org.apache.hadoop.yarn.server.globalpolicygenerator.GPGContext;
|
import org.apache.hadoop.yarn.server.globalpolicygenerator.GPGContext;
|
||||||
import org.apache.hadoop.yarn.server.globalpolicygenerator.GPGUtils;
|
import org.apache.hadoop.yarn.server.globalpolicygenerator.GPGUtils;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.webapp.DeSelectFields;
|
import org.apache.hadoop.yarn.server.resourcemanager.webapp.DeSelectFields;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.webapp.RMWSConsts;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
|
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppsInfo;
|
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppsInfo;
|
||||||
import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
|
import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
|
||||||
@ -46,6 +49,7 @@ public abstract class ApplicationCleaner implements Runnable {
|
|||||||
|
|
||||||
private Configuration conf;
|
private Configuration conf;
|
||||||
private GPGContext gpgContext;
|
private GPGContext gpgContext;
|
||||||
|
private FederationRegistryClient registryClient;
|
||||||
|
|
||||||
private int minRouterSuccessCount;
|
private int minRouterSuccessCount;
|
||||||
private int maxRouterRetry;
|
private int maxRouterRetry;
|
||||||
@ -56,6 +60,7 @@ public void init(Configuration config, GPGContext context)
|
|||||||
|
|
||||||
this.gpgContext = context;
|
this.gpgContext = context;
|
||||||
this.conf = config;
|
this.conf = config;
|
||||||
|
this.registryClient = context.getRegistryClient();
|
||||||
|
|
||||||
String routerSpecString =
|
String routerSpecString =
|
||||||
this.conf.get(YarnConfiguration.GPG_APPCLEANER_CONTACT_ROUTER_SPEC,
|
this.conf.get(YarnConfiguration.GPG_APPCLEANER_CONTACT_ROUTER_SPEC,
|
||||||
@ -80,10 +85,9 @@ public void init(Configuration config, GPGContext context)
|
|||||||
+ this.minRouterSuccessCount + " should be positive");
|
+ this.minRouterSuccessCount + " should be positive");
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info(
|
LOG.info("Initialized AppCleaner with Router query with min success {}, " +
|
||||||
"Initialized AppCleaner with Router query with min success {}, "
|
"max retry {}, retry interval {}.", this.minRouterSuccessCount,
|
||||||
+ "max retry {}, retry interval {}",
|
this.maxRouterRetry,
|
||||||
this.minRouterSuccessCount, this.maxRouterRetry,
|
|
||||||
DurationFormatUtils.formatDurationISO(this.routerQueryIntevalMillis));
|
DurationFormatUtils.formatDurationISO(this.routerQueryIntevalMillis));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -100,9 +104,9 @@ public GPGContext getGPGContext() {
|
|||||||
public Set<ApplicationId> getAppsFromRouter() throws YarnRuntimeException {
|
public Set<ApplicationId> getAppsFromRouter() throws YarnRuntimeException {
|
||||||
String webAppAddress = WebAppUtils.getRouterWebAppURLWithScheme(conf);
|
String webAppAddress = WebAppUtils.getRouterWebAppURLWithScheme(conf);
|
||||||
|
|
||||||
LOG.info(String.format("Contacting router at: %s", webAppAddress));
|
LOG.info("Contacting router at: {}.", webAppAddress);
|
||||||
AppsInfo appsInfo = GPGUtils.invokeRMWebService(webAppAddress, "apps", AppsInfo.class, conf,
|
AppsInfo appsInfo = GPGUtils.invokeRMWebService(webAppAddress, RMWSConsts.APPS,
|
||||||
DeSelectFields.DeSelectType.RESOURCE_REQUESTS.toString());
|
AppsInfo.class, conf, DeSelectFields.DeSelectType.RESOURCE_REQUESTS.toString());
|
||||||
|
|
||||||
Set<ApplicationId> appSet = new HashSet<>();
|
Set<ApplicationId> appSet = new HashSet<>();
|
||||||
for (AppInfo appInfo : appsInfo.getApps()) {
|
for (AppInfo appInfo : appsInfo.getApps()) {
|
||||||
@ -148,6 +152,18 @@ public Set<ApplicationId> getRouterKnownApplications() throws YarnException {
|
|||||||
+ " success Router queries after " + totalAttemptCount + " retries");
|
+ " success Router queries after " + totalAttemptCount + " retries");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void cleanupAppRecordInRegistry(Set<ApplicationId> knownApps) {
|
||||||
|
List<String> allApps = this.registryClient.getAllApplications();
|
||||||
|
LOG.info("Got {} existing apps in registry.", allApps.size());
|
||||||
|
for (String app : allApps) {
|
||||||
|
ApplicationId appId = ApplicationId.fromString(app);
|
||||||
|
if (!knownApps.contains(appId)) {
|
||||||
|
LOG.info("removing finished application entry for {}", app);
|
||||||
|
this.registryClient.removeAppFromRegistry(appId, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public abstract void run();
|
public abstract void run();
|
||||||
}
|
}
|
||||||
|
@ -70,6 +70,8 @@ public void run() {
|
|||||||
LOG.error("deleteApplicationHomeSubCluster failed at application {}.", appId, e);
|
LOG.error("deleteApplicationHomeSubCluster failed at application {}.", appId, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Clean up registry entries
|
||||||
|
cleanupAppRecordInRegistry(routerApps);
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
LOG.error("Application cleaner started at time {} fails. ", now, e);
|
LOG.error("Application cleaner started at time {} fails. ", now, e);
|
||||||
}
|
}
|
||||||
|
@ -24,15 +24,21 @@
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.registry.client.api.RegistryOperations;
|
||||||
|
import org.apache.hadoop.registry.client.impl.FSRegistryOperationsService;
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
import org.apache.hadoop.security.token.Token;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
|
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
||||||
import org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore;
|
import org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore;
|
||||||
import org.apache.hadoop.yarn.server.federation.store.records.AddApplicationHomeSubClusterRequest;
|
import org.apache.hadoop.yarn.server.federation.store.records.AddApplicationHomeSubClusterRequest;
|
||||||
import org.apache.hadoop.yarn.server.federation.store.records.ApplicationHomeSubCluster;
|
import org.apache.hadoop.yarn.server.federation.store.records.ApplicationHomeSubCluster;
|
||||||
import org.apache.hadoop.yarn.server.federation.store.records.GetApplicationsHomeSubClusterRequest;
|
import org.apache.hadoop.yarn.server.federation.store.records.GetApplicationsHomeSubClusterRequest;
|
||||||
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
|
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
|
||||||
|
import org.apache.hadoop.yarn.server.federation.utils.FederationRegistryClient;
|
||||||
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
|
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
|
||||||
import org.apache.hadoop.yarn.server.globalpolicygenerator.GPGContext;
|
import org.apache.hadoop.yarn.server.globalpolicygenerator.GPGContext;
|
||||||
import org.apache.hadoop.yarn.server.globalpolicygenerator.GPGContextImpl;
|
import org.apache.hadoop.yarn.server.globalpolicygenerator.GPGContextImpl;
|
||||||
@ -50,6 +56,8 @@ public class TestDefaultApplicationCleaner {
|
|||||||
private FederationStateStoreFacade facade;
|
private FederationStateStoreFacade facade;
|
||||||
private ApplicationCleaner appCleaner;
|
private ApplicationCleaner appCleaner;
|
||||||
private GPGContext gpgContext;
|
private GPGContext gpgContext;
|
||||||
|
private RegistryOperations registry;
|
||||||
|
private FederationRegistryClient registryClient;
|
||||||
|
|
||||||
private List<ApplicationId> appIds;
|
private List<ApplicationId> appIds;
|
||||||
// The list of applications returned by mocked router
|
// The list of applications returned by mocked router
|
||||||
@ -68,8 +76,18 @@ public void setup() throws Exception {
|
|||||||
facade = FederationStateStoreFacade.getInstance();
|
facade = FederationStateStoreFacade.getInstance();
|
||||||
facade.reinitialize(stateStore, conf);
|
facade.reinitialize(stateStore, conf);
|
||||||
|
|
||||||
|
registry = new FSRegistryOperationsService();
|
||||||
|
registry.init(conf);
|
||||||
|
registry.start();
|
||||||
|
|
||||||
|
UserGroupInformation user = UserGroupInformation.getCurrentUser();
|
||||||
|
registryClient = new FederationRegistryClient(conf, registry, user);
|
||||||
|
registryClient.cleanAllApplications();
|
||||||
|
Assert.assertEquals(0, registryClient.getAllApplications().size());
|
||||||
|
|
||||||
gpgContext = new GPGContextImpl();
|
gpgContext = new GPGContextImpl();
|
||||||
gpgContext.setStateStoreFacade(facade);
|
gpgContext.setStateStoreFacade(facade);
|
||||||
|
gpgContext.setRegistryClient(registryClient);
|
||||||
|
|
||||||
appCleaner = new TestableDefaultApplicationCleaner();
|
appCleaner = new TestableDefaultApplicationCleaner();
|
||||||
appCleaner.init(conf, gpgContext);
|
appCleaner.init(conf, gpgContext);
|
||||||
@ -87,7 +105,12 @@ public void setup() throws Exception {
|
|||||||
stateStore.addApplicationHomeSubCluster(
|
stateStore.addApplicationHomeSubCluster(
|
||||||
AddApplicationHomeSubClusterRequest.newInstance(
|
AddApplicationHomeSubClusterRequest.newInstance(
|
||||||
ApplicationHomeSubCluster.newInstance(appId, subClusterId)));
|
ApplicationHomeSubCluster.newInstance(appId, subClusterId)));
|
||||||
|
|
||||||
|
// Write some registry entries for the app
|
||||||
|
registryClient.writeAMRMTokenForUAM(appId, subClusterId.toString(),
|
||||||
|
new Token<AMRMTokenIdentifier>());
|
||||||
}
|
}
|
||||||
|
Assert.assertEquals(3, registryClient.getAllApplications().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
@After
|
@After
|
||||||
@ -96,6 +119,14 @@ public void breakDown() {
|
|||||||
stateStore.close();
|
stateStore.close();
|
||||||
stateStore = null;
|
stateStore = null;
|
||||||
}
|
}
|
||||||
|
if (registryClient != null) {
|
||||||
|
registryClient.cleanAllApplications();
|
||||||
|
registryClient = null;
|
||||||
|
}
|
||||||
|
if (registry != null) {
|
||||||
|
registry.stop();
|
||||||
|
registry = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -116,6 +147,9 @@ public void testFederationStateStoreAppsCleanUp() throws YarnException {
|
|||||||
.getApplicationsHomeSubCluster(
|
.getApplicationsHomeSubCluster(
|
||||||
GetApplicationsHomeSubClusterRequest.newInstance())
|
GetApplicationsHomeSubClusterRequest.newInstance())
|
||||||
.getAppsHomeSubClusters().size());
|
.getAppsHomeSubClusters().size());
|
||||||
|
|
||||||
|
// The known app should not be cleaned in registry
|
||||||
|
Assert.assertEquals(1, registryClient.getAllApplications().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
Reference in New Issue
Block a user