YARN-9048. Add znode hierarchy in Federation ZK State Store. (#6016)
This commit is contained in:
parent
594e9f29f5
commit
b00d605832
@ -26,6 +26,8 @@ import java.nio.ByteBuffer;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Calendar;
|
import java.util.Calendar;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.TimeZone;
|
import java.util.TimeZone;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
@ -111,6 +113,7 @@ import org.apache.hadoop.yarn.server.records.impl.pb.VersionPBImpl;
|
|||||||
import org.apache.hadoop.yarn.util.Clock;
|
import org.apache.hadoop.yarn.util.Clock;
|
||||||
import org.apache.hadoop.yarn.util.Records;
|
import org.apache.hadoop.yarn.util.Records;
|
||||||
import org.apache.hadoop.yarn.util.SystemClock;
|
import org.apache.hadoop.yarn.util.SystemClock;
|
||||||
|
import org.apache.zookeeper.KeeperException;
|
||||||
import org.apache.zookeeper.data.ACL;
|
import org.apache.zookeeper.data.ACL;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@ -131,8 +134,16 @@ import static org.apache.hadoop.util.curator.ZKCuratorManager.getNodePath;
|
|||||||
* | |----- SC1
|
* | |----- SC1
|
||||||
* | |----- SC2
|
* | |----- SC2
|
||||||
* |--- APPLICATION
|
* |--- APPLICATION
|
||||||
* | |----- APP1
|
* | |----- HIERARCHIES
|
||||||
* | |----- APP2
|
* | | |----- 1
|
||||||
|
* | | | |----- (#ApplicationId barring last character)
|
||||||
|
* | | | | | |----- APP Data
|
||||||
|
* | | | ....
|
||||||
|
* | | |
|
||||||
|
* | | |----- 2
|
||||||
|
* | | | |----- (#ApplicationId barring last 2 characters)
|
||||||
|
* | | | | |----- (#Last 2 characters of ApplicationId)
|
||||||
|
* | | | | | |----- APP Data
|
||||||
* |--- POLICY
|
* |--- POLICY
|
||||||
* | |----- QUEUE1
|
* | |----- QUEUE1
|
||||||
* | |----- QUEUE1
|
* | |----- QUEUE1
|
||||||
@ -194,12 +205,19 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
|
|||||||
private int maxAppsInStateStore;
|
private int maxAppsInStateStore;
|
||||||
|
|
||||||
/** Directory to store the delegation token data. **/
|
/** Directory to store the delegation token data. **/
|
||||||
|
private Map<Integer, String> routerAppRootHierarchies;
|
||||||
private String routerRMDTSecretManagerRoot;
|
private String routerRMDTSecretManagerRoot;
|
||||||
private String routerRMDTMasterKeysRootPath;
|
private String routerRMDTMasterKeysRootPath;
|
||||||
private String routerRMDelegationTokensRootPath;
|
private String routerRMDelegationTokensRootPath;
|
||||||
private String routerRMSequenceNumberPath;
|
private String routerRMSequenceNumberPath;
|
||||||
private String routerRMMasterKeyIdPath;
|
private String routerRMMasterKeyIdPath;
|
||||||
|
|
||||||
|
private int appIdNodeSplitIndex = 0;
|
||||||
|
private final static int HIERARCHIES_LEVEL = 4;
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public static final String ROUTER_APP_ROOT_HIERARCHIES = "HIERARCHIES";
|
||||||
|
|
||||||
private volatile Clock clock = SystemClock.getInstance();
|
private volatile Clock clock = SystemClock.getInstance();
|
||||||
|
|
||||||
protected static final Version CURRENT_VERSION_INFO = Version.newInstance(1, 1);
|
protected static final Version CURRENT_VERSION_INFO = Version.newInstance(1, 1);
|
||||||
@ -208,6 +226,27 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
|
|||||||
private ZKFederationStateStoreOpDurations opDurations =
|
private ZKFederationStateStoreOpDurations opDurations =
|
||||||
ZKFederationStateStoreOpDurations.getInstance();
|
ZKFederationStateStoreOpDurations.getInstance();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Indicates different app attempt state store operations.
|
||||||
|
*/
|
||||||
|
private enum AppAttemptOp {
|
||||||
|
STORE,
|
||||||
|
UPDATE,
|
||||||
|
REMOVE
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encapsulates full app node path and corresponding split index.
|
||||||
|
*/
|
||||||
|
private final static class AppNodeSplitInfo {
|
||||||
|
private final String path;
|
||||||
|
private final int splitIndex;
|
||||||
|
AppNodeSplitInfo(String path, int splitIndex) {
|
||||||
|
this.path = path;
|
||||||
|
this.splitIndex = splitIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void init(Configuration conf) throws YarnException {
|
public void init(Configuration conf) throws YarnException {
|
||||||
|
|
||||||
@ -234,6 +273,23 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
|
|||||||
reservationsZNode = getNodePath(baseZNode, ROOT_ZNODE_NAME_RESERVATION);
|
reservationsZNode = getNodePath(baseZNode, ROOT_ZNODE_NAME_RESERVATION);
|
||||||
versionNode = getNodePath(baseZNode, ROOT_ZNODE_NAME_VERSION);
|
versionNode = getNodePath(baseZNode, ROOT_ZNODE_NAME_VERSION);
|
||||||
|
|
||||||
|
String hierarchiesPath = getNodePath(appsZNode, ROUTER_APP_ROOT_HIERARCHIES);
|
||||||
|
routerAppRootHierarchies = new HashMap<>();
|
||||||
|
routerAppRootHierarchies.put(0, appsZNode);
|
||||||
|
for (int splitIndex = 1; splitIndex <= HIERARCHIES_LEVEL; splitIndex++) {
|
||||||
|
routerAppRootHierarchies.put(splitIndex,
|
||||||
|
getNodePath(hierarchiesPath, Integer.toString(splitIndex)));
|
||||||
|
}
|
||||||
|
|
||||||
|
appIdNodeSplitIndex = conf.getInt(YarnConfiguration.ZK_APPID_NODE_SPLIT_INDEX,
|
||||||
|
YarnConfiguration.DEFAULT_ZK_APPID_NODE_SPLIT_INDEX);
|
||||||
|
if (appIdNodeSplitIndex < 1 || appIdNodeSplitIndex > HIERARCHIES_LEVEL) {
|
||||||
|
LOG.info("Invalid value {} for config {} specified. Resetting it to {}",
|
||||||
|
appIdNodeSplitIndex, YarnConfiguration.ZK_APPID_NODE_SPLIT_INDEX,
|
||||||
|
YarnConfiguration.DEFAULT_ZK_APPID_NODE_SPLIT_INDEX);
|
||||||
|
appIdNodeSplitIndex = YarnConfiguration.DEFAULT_ZK_APPID_NODE_SPLIT_INDEX;
|
||||||
|
}
|
||||||
|
|
||||||
// delegation token znodes
|
// delegation token znodes
|
||||||
routerRMDTSecretManagerRoot = getNodePath(baseZNode, ROUTER_RM_DT_SECRET_MANAGER_ROOT);
|
routerRMDTSecretManagerRoot = getNodePath(baseZNode, ROUTER_RM_DT_SECRET_MANAGER_ROOT);
|
||||||
routerRMDTMasterKeysRootPath = getNodePath(routerRMDTSecretManagerRoot,
|
routerRMDTMasterKeysRootPath = getNodePath(routerRMDTSecretManagerRoot,
|
||||||
@ -250,6 +306,12 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
|
|||||||
List<ACL> zkAcl = ZKCuratorManager.getZKAcls(conf);
|
List<ACL> zkAcl = ZKCuratorManager.getZKAcls(conf);
|
||||||
zkManager.createRootDirRecursively(membershipZNode, zkAcl);
|
zkManager.createRootDirRecursively(membershipZNode, zkAcl);
|
||||||
zkManager.createRootDirRecursively(appsZNode, zkAcl);
|
zkManager.createRootDirRecursively(appsZNode, zkAcl);
|
||||||
|
zkManager.createRootDirRecursively(
|
||||||
|
getNodePath(appsZNode, ROUTER_APP_ROOT_HIERARCHIES));
|
||||||
|
for (int splitIndex = 1; splitIndex <= HIERARCHIES_LEVEL; splitIndex++) {
|
||||||
|
zkManager.createRootDirRecursively(
|
||||||
|
routerAppRootHierarchies.get(splitIndex));
|
||||||
|
}
|
||||||
zkManager.createRootDirRecursively(policiesZNode, zkAcl);
|
zkManager.createRootDirRecursively(policiesZNode, zkAcl);
|
||||||
zkManager.createRootDirRecursively(reservationsZNode, zkAcl);
|
zkManager.createRootDirRecursively(reservationsZNode, zkAcl);
|
||||||
zkManager.createRootDirRecursively(routerRMDTSecretManagerRoot, zkAcl);
|
zkManager.createRootDirRecursively(routerRMDTSecretManagerRoot, zkAcl);
|
||||||
@ -320,6 +382,21 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Register the home {@code SubClusterId} of the newly submitted
|
||||||
|
* {@code ApplicationId}. Currently response is empty if the operation was
|
||||||
|
* successful, if not an exception reporting reason for a failure. If a
|
||||||
|
* mapping for the application already existed, the {@code SubClusterId} in
|
||||||
|
* this response will return the existing mapping which might be different
|
||||||
|
* from that in the {@code AddApplicationHomeSubClusterRequest}.
|
||||||
|
*
|
||||||
|
* @param request the request to register a new application with its home sub-cluster.
|
||||||
|
* @return upon successful registration of the application in the StateStore,
|
||||||
|
* {@code AddApplicationHomeSubClusterRequest} containing the home
|
||||||
|
* sub-cluster of the application. Otherwise, an exception reporting
|
||||||
|
* reason for a failure.
|
||||||
|
* @throws YarnException indicates exceptions from yarn servers.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public AddApplicationHomeSubClusterResponse addApplicationHomeSubCluster(
|
public AddApplicationHomeSubClusterResponse addApplicationHomeSubCluster(
|
||||||
AddApplicationHomeSubClusterRequest request) throws YarnException {
|
AddApplicationHomeSubClusterRequest request) throws YarnException {
|
||||||
@ -367,6 +444,17 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
|
|||||||
throw new YarnException("Cannot addApplicationHomeSubCluster by request");
|
throw new YarnException("Cannot addApplicationHomeSubCluster by request");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the home {@code SubClusterId} of a previously submitted
|
||||||
|
* {@code ApplicationId}. Currently response is empty if the operation was
|
||||||
|
* successful, if not an exception reporting reason for a failure.
|
||||||
|
*
|
||||||
|
* @param request the request to update the home sub-cluster of an
|
||||||
|
* application.
|
||||||
|
* @return empty on successful update of the application in the StateStore, if
|
||||||
|
* not an exception reporting reason for a failure
|
||||||
|
* @throws YarnException indicates exceptions from yarn servers.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public UpdateApplicationHomeSubClusterResponse updateApplicationHomeSubCluster(
|
public UpdateApplicationHomeSubClusterResponse updateApplicationHomeSubCluster(
|
||||||
UpdateApplicationHomeSubClusterRequest request) throws YarnException {
|
UpdateApplicationHomeSubClusterRequest request) throws YarnException {
|
||||||
@ -402,6 +490,15 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
|
|||||||
return UpdateApplicationHomeSubClusterResponse.newInstance();
|
return UpdateApplicationHomeSubClusterResponse.newInstance();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get information about the application identified by the input
|
||||||
|
* {@code ApplicationId}.
|
||||||
|
*
|
||||||
|
* @param request contains the application queried
|
||||||
|
* @return {@code ApplicationHomeSubCluster} containing the application's home
|
||||||
|
* subcluster
|
||||||
|
* @throws YarnException indicates exceptions from yarn servers.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public GetApplicationHomeSubClusterResponse getApplicationHomeSubCluster(
|
public GetApplicationHomeSubClusterResponse getApplicationHomeSubCluster(
|
||||||
GetApplicationHomeSubClusterRequest request) throws YarnException {
|
GetApplicationHomeSubClusterRequest request) throws YarnException {
|
||||||
@ -437,6 +534,14 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
|
|||||||
subClusterId, createTime);
|
subClusterId, createTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the {@code ApplicationHomeSubCluster} list representing the mapping of
|
||||||
|
* all submitted applications to it's home sub-cluster.
|
||||||
|
*
|
||||||
|
* @param request empty representing all applications
|
||||||
|
* @return the mapping of all submitted application to it's home sub-cluster
|
||||||
|
* @throws YarnException indicates exceptions from yarn servers.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public GetApplicationsHomeSubClusterResponse getApplicationsHomeSubCluster(
|
public GetApplicationsHomeSubClusterResponse getApplicationsHomeSubCluster(
|
||||||
GetApplicationsHomeSubClusterRequest request) throws YarnException {
|
GetApplicationsHomeSubClusterRequest request) throws YarnException {
|
||||||
@ -448,9 +553,7 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
|
|||||||
try {
|
try {
|
||||||
long start = clock.getTime();
|
long start = clock.getTime();
|
||||||
SubClusterId requestSC = request.getSubClusterId();
|
SubClusterId requestSC = request.getSubClusterId();
|
||||||
List<String> children = zkManager.getChildren(appsZNode);
|
List<ApplicationHomeSubCluster> result = loadRouterApplications().stream()
|
||||||
List<ApplicationHomeSubCluster> result = children.stream()
|
|
||||||
.map(child -> generateAppHomeSC(child))
|
|
||||||
.sorted(Comparator.comparing(ApplicationHomeSubCluster::getCreateTime).reversed())
|
.sorted(Comparator.comparing(ApplicationHomeSubCluster::getCreateTime).reversed())
|
||||||
.filter(appHomeSC -> filterHomeSubCluster(requestSC, appHomeSC.getHomeSubCluster()))
|
.filter(appHomeSC -> filterHomeSubCluster(requestSC, appHomeSC.getHomeSubCluster()))
|
||||||
.limit(maxAppsInStateStore)
|
.limit(maxAppsInStateStore)
|
||||||
@ -467,48 +570,51 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
|
|||||||
throw new YarnException("Cannot get app by request");
|
throw new YarnException("Cannot get app by request");
|
||||||
}
|
}
|
||||||
|
|
||||||
private ApplicationHomeSubCluster generateAppHomeSC(String appId) {
|
/**
|
||||||
try {
|
* Delete the mapping of home {@code SubClusterId} of a previously submitted
|
||||||
// Parse ApplicationHomeSubCluster
|
* {@code ApplicationId}. Currently response is empty if the operation was
|
||||||
ApplicationId applicationId = ApplicationId.fromString(appId);
|
* successful, if not an exception reporting reason for a failure.
|
||||||
ApplicationHomeSubCluster zkStoreApplicationHomeSubCluster =
|
*
|
||||||
getApplicationHomeSubCluster(applicationId);
|
* @param request the request to delete the home sub-cluster of an
|
||||||
|
* application.
|
||||||
// Prepare to return data
|
* @return empty on successful update of the application in the StateStore, if
|
||||||
SubClusterId subClusterId = zkStoreApplicationHomeSubCluster.getHomeSubCluster();
|
* not an exception reporting reason for a failure
|
||||||
ApplicationHomeSubCluster resultApplicationHomeSubCluster =
|
* @throws YarnException if the request is invalid/fails
|
||||||
ApplicationHomeSubCluster.newInstance(applicationId, subClusterId);
|
*/
|
||||||
return resultApplicationHomeSubCluster;
|
|
||||||
} catch (Exception ex) {
|
|
||||||
LOG.error("get homeSubCluster by appId = {}.", appId, ex);
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DeleteApplicationHomeSubClusterResponse
|
public DeleteApplicationHomeSubClusterResponse deleteApplicationHomeSubCluster(
|
||||||
deleteApplicationHomeSubCluster(
|
DeleteApplicationHomeSubClusterRequest request) throws YarnException {
|
||||||
DeleteApplicationHomeSubClusterRequest request)
|
|
||||||
throws YarnException {
|
|
||||||
long start = clock.getTime();
|
long start = clock.getTime();
|
||||||
FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
|
FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
|
||||||
ApplicationId appId = request.getApplicationId();
|
ApplicationId appId = request.getApplicationId();
|
||||||
String appZNode = getNodePath(appsZNode, appId.toString());
|
String appIdRemovePath = getLeafAppIdNodePath(appId.toString(), false);
|
||||||
|
int splitIndex = appIdNodeSplitIndex;
|
||||||
|
|
||||||
boolean exists = false;
|
boolean exists = true;
|
||||||
try {
|
try {
|
||||||
exists = zkManager.exists(appZNode);
|
if (!exists(appIdRemovePath)) {
|
||||||
|
AppNodeSplitInfo alternatePathInfo = getAlternatePath(appId.toString());
|
||||||
|
if (alternatePathInfo != null) {
|
||||||
|
appIdRemovePath = alternatePathInfo.path;
|
||||||
|
splitIndex = alternatePathInfo.splitIndex;
|
||||||
|
} else {
|
||||||
|
exists = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
String errMsg = "Cannot check app: " + e.getMessage();
|
String errMsg = "Cannot check app: " + e.getMessage();
|
||||||
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
|
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!exists) {
|
if (!exists) {
|
||||||
String errMsg = "Application " + appId + " does not exist";
|
String errMsg = "Application " + appId + " does not exist";
|
||||||
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
|
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
zkManager.delete(appZNode);
|
zkManager.delete(appIdRemovePath);
|
||||||
|
// Check if we should remove the parent app node as well.
|
||||||
|
checkRemoveParentAppNode(appIdRemovePath, splitIndex);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
String errMsg = "Cannot delete app: " + e.getMessage();
|
String errMsg = "Cannot delete app: " + e.getMessage();
|
||||||
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
|
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
|
||||||
@ -730,11 +836,12 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
|
|||||||
*
|
*
|
||||||
* @param appId Application identifier.
|
* @param appId Application identifier.
|
||||||
* @return ApplicationHomeSubCluster identifier.
|
* @return ApplicationHomeSubCluster identifier.
|
||||||
* @throws Exception If it cannot contact ZooKeeper.
|
* @throws YarnException If it cannot contact ZooKeeper.
|
||||||
*/
|
*/
|
||||||
private ApplicationHomeSubCluster getApplicationHomeSubCluster(
|
private ApplicationHomeSubCluster getApplicationHomeSubCluster(
|
||||||
final ApplicationId appId) throws YarnException {
|
final ApplicationId appId) throws YarnException{
|
||||||
String appZNode = getNodePath(appsZNode, appId.toString());
|
|
||||||
|
String appZNode = getLeafAppIdNodePath(appId.toString(), false);
|
||||||
|
|
||||||
ApplicationHomeSubCluster appHomeSubCluster = null;
|
ApplicationHomeSubCluster appHomeSubCluster = null;
|
||||||
byte[] data = get(appZNode);
|
byte[] data = get(appZNode);
|
||||||
@ -761,11 +868,44 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
|
|||||||
private void storeOrUpdateApplicationHomeSubCluster(final ApplicationId applicationId,
|
private void storeOrUpdateApplicationHomeSubCluster(final ApplicationId applicationId,
|
||||||
final ApplicationHomeSubCluster applicationHomeSubCluster, boolean update)
|
final ApplicationHomeSubCluster applicationHomeSubCluster, boolean update)
|
||||||
throws YarnException {
|
throws YarnException {
|
||||||
String appZNode = getNodePath(appsZNode, applicationId.toString());
|
try {
|
||||||
ApplicationHomeSubClusterProto proto =
|
ApplicationHomeSubClusterProto proto =
|
||||||
((ApplicationHomeSubClusterPBImpl) applicationHomeSubCluster).getProto();
|
((ApplicationHomeSubClusterPBImpl) applicationHomeSubCluster).getProto();
|
||||||
byte[] data = proto.toByteArray();
|
byte[] data = proto.toByteArray();
|
||||||
put(appZNode, data, update);
|
if (update) {
|
||||||
|
updateApplicationStateInternal(applicationId, data);
|
||||||
|
} else {
|
||||||
|
storeApplicationStateInternal(applicationId, data);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new YarnException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void storeApplicationStateInternal(final ApplicationId applicationId, byte[] data)
|
||||||
|
throws Exception {
|
||||||
|
String nodeCreatePath = getLeafAppIdNodePath(applicationId.toString(), true);
|
||||||
|
LOG.debug("Storing info for app: {} at: {}.", applicationId, nodeCreatePath);
|
||||||
|
put(nodeCreatePath, data, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void updateApplicationStateInternal(final ApplicationId applicationId, byte[] data)
|
||||||
|
throws Exception {
|
||||||
|
String nodeUpdatePath = getLeafAppIdNodePath(applicationId.toString(), false);
|
||||||
|
if (!exists(nodeUpdatePath)) {
|
||||||
|
AppNodeSplitInfo alternatePathInfo = getAlternatePath(applicationId.toString());
|
||||||
|
if (alternatePathInfo != null) {
|
||||||
|
nodeUpdatePath = alternatePathInfo.path;
|
||||||
|
} else if (appIdNodeSplitIndex != 0) {
|
||||||
|
// No alternate path exists. Create path as per configured split index.
|
||||||
|
String rootNode = getSplitAppNodeParent(nodeUpdatePath, appIdNodeSplitIndex);
|
||||||
|
if (!exists(rootNode)) {
|
||||||
|
zkManager.create(rootNode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOG.debug("Storing final state info for app: {} at: {}.", applicationId, nodeUpdatePath);
|
||||||
|
put(nodeUpdatePath, data, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1674,4 +1814,198 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
|
|||||||
}
|
}
|
||||||
return keyIdSeqCounter.getCount();
|
return keyIdSeqCounter.getCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get parent app node path based on full path and split index supplied.
|
||||||
|
* @param appIdPath App id path for which parent needs to be returned.
|
||||||
|
* @param splitIndex split index.
|
||||||
|
* @return parent app node path.
|
||||||
|
*/
|
||||||
|
private String getSplitAppNodeParent(String appIdPath, int splitIndex) {
|
||||||
|
// Calculated as string upto index (appIdPath Length - split index - 1). We
|
||||||
|
// deduct 1 to exclude path separator.
|
||||||
|
return appIdPath.substring(0, appIdPath.length() - splitIndex - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if parent app node has no leaf nodes and if it does not have,
|
||||||
|
* removes it. Called while removing application.
|
||||||
|
*
|
||||||
|
* @param appIdPath path of app id to be removed.
|
||||||
|
* @param splitIndex split index.
|
||||||
|
* @throws Exception if any problem occurs while performing ZK operation.
|
||||||
|
*/
|
||||||
|
private void checkRemoveParentAppNode(String appIdPath, int splitIndex)
|
||||||
|
throws Exception {
|
||||||
|
if (splitIndex == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String parentAppNode = getSplitAppNodeParent(appIdPath, splitIndex);
|
||||||
|
List<String> children;
|
||||||
|
try {
|
||||||
|
children = getChildren(parentAppNode);
|
||||||
|
} catch (KeeperException.NoNodeException ke) {
|
||||||
|
// It should be fine to swallow this exception as the parent app node we
|
||||||
|
// intend to delete is already deleted.
|
||||||
|
LOG.debug("Unable to remove app parent node {} as it does not exist.",
|
||||||
|
parentAppNode);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If children==null or children is not empty, we cannot delete the parent path.
|
||||||
|
if (children == null || !children.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// No apps stored under parent path.
|
||||||
|
try {
|
||||||
|
zkManager.delete(parentAppNode);
|
||||||
|
LOG.debug("No leaf app node exists. Removing parent node {}.", parentAppNode);
|
||||||
|
} catch (KeeperException.NotEmptyException ke) {
|
||||||
|
// It should be fine to swallow this exception as the parent app node
|
||||||
|
// has to be deleted only if it has no children. And this node has.
|
||||||
|
LOG.debug("Unable to remove app parent node {} as it has children.",
|
||||||
|
parentAppNode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> getChildren(final String path) throws Exception {
|
||||||
|
return zkManager.getChildren(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get alternate path for app id if path according to configured split index
|
||||||
|
* does not exist. We look for path based on all possible split indices.
|
||||||
|
* @param appId
|
||||||
|
* @return a {@link AppNodeSplitInfo} object containing the path and split
|
||||||
|
* index if it exists, null otherwise.
|
||||||
|
* @throws Exception if any problem occurs while performing ZK operation.
|
||||||
|
*/
|
||||||
|
private AppNodeSplitInfo getAlternatePath(String appId) throws Exception {
|
||||||
|
for (Map.Entry<Integer, String> entry : routerAppRootHierarchies.entrySet()) {
|
||||||
|
// Look for other paths
|
||||||
|
int splitIndex = entry.getKey();
|
||||||
|
if (splitIndex != appIdNodeSplitIndex) {
|
||||||
|
String alternatePath =
|
||||||
|
getLeafAppIdNodePath(appId, entry.getValue(), splitIndex, false);
|
||||||
|
if (exists(alternatePath)) {
|
||||||
|
return new AppNodeSplitInfo(alternatePath, splitIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns leaf app node path based on app id and passed split index. If the
|
||||||
|
* passed flag createParentIfNotExists is true, also creates the parent app
|
||||||
|
* node if it does not exist.
|
||||||
|
* @param appId application id.
|
||||||
|
* @param rootNode app root node based on split index.
|
||||||
|
* @param appIdNodeSplitIdx split index.
|
||||||
|
* @param createParentIfNotExists flag which determines if parent app node
|
||||||
|
* needs to be created(as per split) if it does not exist.
|
||||||
|
* @return leaf app node path.
|
||||||
|
* @throws Exception if any problem occurs while performing ZK operation.
|
||||||
|
*/
|
||||||
|
private String getLeafAppIdNodePath(String appId, String rootNode,
|
||||||
|
int appIdNodeSplitIdx, boolean createParentIfNotExists) throws Exception {
|
||||||
|
if (appIdNodeSplitIdx == 0) {
|
||||||
|
return getNodePath(rootNode, appId);
|
||||||
|
}
|
||||||
|
String nodeName = appId;
|
||||||
|
int splitIdx = nodeName.length() - appIdNodeSplitIdx;
|
||||||
|
String rootNodePath = getNodePath(rootNode, nodeName.substring(0, splitIdx));
|
||||||
|
if (createParentIfNotExists && !exists(rootNodePath)) {
|
||||||
|
try {
|
||||||
|
zkManager.create(rootNodePath);
|
||||||
|
} catch (KeeperException.NodeExistsException e) {
|
||||||
|
LOG.debug("Unable to create app parent node {} as it already exists.", rootNodePath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return getNodePath(rootNodePath, nodeName.substring(splitIdx));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns leaf app node path based on app id and configured split index. If
|
||||||
|
* the passed flag createParentIfNotExists is true, also creates the parent
|
||||||
|
* app node if it does not exist.
|
||||||
|
* @param appId application id.
|
||||||
|
* @param createParentIfNotExists flag which determines if parent app node
|
||||||
|
* needs to be created(as per split) if it does not exist.
|
||||||
|
* @return leaf app node path.
|
||||||
|
* @throws YarnException if any problem occurs while performing ZK operation.
|
||||||
|
*/
|
||||||
|
private String getLeafAppIdNodePath(String appId,
|
||||||
|
boolean createParentIfNotExists) throws YarnException {
|
||||||
|
try {
|
||||||
|
String rootNode = routerAppRootHierarchies.get(appIdNodeSplitIndex);
|
||||||
|
return getLeafAppIdNodePath(appId, rootNode, appIdNodeSplitIndex, createParentIfNotExists);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new YarnException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private ApplicationHomeSubCluster loadRouterAppStateFromAppNode(String appNodePath)
|
||||||
|
throws Exception {
|
||||||
|
byte[] data = get(appNodePath);
|
||||||
|
LOG.debug("Loading application from znode: {}", appNodePath);
|
||||||
|
ApplicationHomeSubCluster appHomeSubCluster = null;
|
||||||
|
|
||||||
|
if (data == null) {
|
||||||
|
return appHomeSubCluster;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
appHomeSubCluster = new ApplicationHomeSubClusterPBImpl(
|
||||||
|
ApplicationHomeSubClusterProto.parseFrom(data));
|
||||||
|
} catch (InvalidProtocolBufferException e) {
|
||||||
|
String errMsg = "Cannot parse application at " + appNodePath;
|
||||||
|
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
|
||||||
|
}
|
||||||
|
|
||||||
|
return appHomeSubCluster;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<ApplicationHomeSubCluster> loadRouterApplications() throws Exception {
|
||||||
|
List<ApplicationHomeSubCluster> applicationHomeSubClusters = new ArrayList<>();
|
||||||
|
for (int splitIndex = 0; splitIndex <= 4; splitIndex++) {
|
||||||
|
String appRoot = routerAppRootHierarchies.get(splitIndex);
|
||||||
|
if (appRoot == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
List<String> childNodes = getChildren(appRoot);
|
||||||
|
boolean appNodeFound = false;
|
||||||
|
for (String childNodeName : childNodes) {
|
||||||
|
if (childNodeName.startsWith(ApplicationId.appIdStrPrefix)) {
|
||||||
|
appNodeFound = true;
|
||||||
|
if (splitIndex == 0) {
|
||||||
|
ApplicationHomeSubCluster applicationHomeSubCluster =
|
||||||
|
loadRouterAppStateFromAppNode(getNodePath(appRoot, childNodeName));
|
||||||
|
applicationHomeSubClusters.add(applicationHomeSubCluster);
|
||||||
|
} else {
|
||||||
|
// If AppId Node is partitioned.
|
||||||
|
String parentNodePath = getNodePath(appRoot, childNodeName);
|
||||||
|
List<String> leafNodes = getChildren(parentNodePath);
|
||||||
|
for (String leafNodeName : leafNodes) {
|
||||||
|
ApplicationHomeSubCluster applicationHomeSubCluster =
|
||||||
|
loadRouterAppStateFromAppNode(getNodePath(parentNodePath, leafNodeName));
|
||||||
|
applicationHomeSubClusters.add(applicationHomeSubCluster);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (!childNodeName.equals(ROUTER_APP_ROOT_HIERARCHIES)){
|
||||||
|
LOG.debug("Unknown child node with name {} under {}.", childNodeName, appRoot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (splitIndex != appIdNodeSplitIndex && !appNodeFound) {
|
||||||
|
// If no loaded app exists for a particular split index and the split
|
||||||
|
// index for which apps are being loaded is not the one configured, then
|
||||||
|
// we do not need to keep track of this hierarchy for storing/updating/
|
||||||
|
// removing app/app attempt znodes.
|
||||||
|
routerAppRootHierarchies.remove(splitIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return applicationHomeSubClusters;
|
||||||
|
}
|
||||||
}
|
}
|
@ -0,0 +1,22 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<!--
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>yarn.resourcemanager.zk-appid-node.split-index</name>
|
||||||
|
<value>1</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
Loading…
x
Reference in New Issue
Block a user