YARN-11370. [Federation] Refactor MemoryFederationStateStore code. (#5126)

This commit is contained in:
slfan1989 2023-02-23 04:37:35 +08:00 committed by GitHub
parent e8a6b2c2c4
commit 2e997d818d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 66 additions and 65 deletions

View File

@ -41,6 +41,7 @@
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.federation.store.FederationStateStore;
import org.apache.hadoop.yarn.server.federation.store.metrics.FederationStateStoreClientMetrics;
import org.apache.hadoop.yarn.server.federation.store.records.AddApplicationHomeSubClusterRequest;
import org.apache.hadoop.yarn.server.federation.store.records.AddApplicationHomeSubClusterResponse;
import org.apache.hadoop.yarn.server.federation.store.records.ApplicationHomeSubCluster;
@ -107,7 +108,7 @@
public class MemoryFederationStateStore implements FederationStateStore {
private Map<SubClusterId, SubClusterInfo> membership;
private Map<ApplicationId, SubClusterId> applications;
private Map<ApplicationId, ApplicationHomeSubCluster> applications;
private Map<ReservationId, SubClusterId> reservations;
private Map<String, SubClusterPolicyConfiguration> policies;
private RouterRMDTSecretManagerState routerRMSecretManagerState;
@ -122,10 +123,10 @@ public class MemoryFederationStateStore implements FederationStateStore {
@Override
public void init(Configuration conf) {
membership = new ConcurrentHashMap<SubClusterId, SubClusterInfo>();
applications = new ConcurrentHashMap<ApplicationId, SubClusterId>();
reservations = new ConcurrentHashMap<ReservationId, SubClusterId>();
policies = new ConcurrentHashMap<String, SubClusterPolicyConfiguration>();
membership = new ConcurrentHashMap<>();
applications = new ConcurrentHashMap<>();
reservations = new ConcurrentHashMap<>();
policies = new ConcurrentHashMap<>();
routerRMSecretManagerState = new RouterRMDTSecretManagerState();
maxAppsInStateStore = conf.getInt(
YarnConfiguration.FEDERATION_STATESTORE_MAX_APPLICATIONS,
@ -143,14 +144,15 @@ public void close() {
}
@Override
public SubClusterRegisterResponse registerSubCluster(
SubClusterRegisterRequest request) throws YarnException {
public SubClusterRegisterResponse registerSubCluster(SubClusterRegisterRequest request)
throws YarnException {
long startTime = clock.getTime();
FederationMembershipStateStoreInputValidator.validate(request);
SubClusterInfo subClusterInfo = request.getSubClusterInfo();
long currentTime =
Calendar.getInstance(TimeZone.getTimeZone("UTC")).getTimeInMillis();
SubClusterInfo subClusterInfoToSave =
SubClusterInfo.newInstance(subClusterInfo.getSubClusterId(),
subClusterInfo.getAMRMServiceAddress(),
@ -161,18 +163,21 @@ public SubClusterRegisterResponse registerSubCluster(
subClusterInfo.getCapability());
membership.put(subClusterInfo.getSubClusterId(), subClusterInfoToSave);
long stopTime = clock.getTime();
FederationStateStoreClientMetrics.succeededStateStoreCall(stopTime - startTime);
return SubClusterRegisterResponse.newInstance();
}
@Override
public SubClusterDeregisterResponse deregisterSubCluster(
SubClusterDeregisterRequest request) throws YarnException {
public SubClusterDeregisterResponse deregisterSubCluster(SubClusterDeregisterRequest request)
throws YarnException {
FederationMembershipStateStoreInputValidator.validate(request);
SubClusterInfo subClusterInfo = membership.get(request.getSubClusterId());
if (subClusterInfo == null) {
String errMsg =
"SubCluster " + request.getSubClusterId().toString() + " not found";
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
FederationStateStoreUtils.logAndThrowStoreException(
LOG, "SubCluster %s not found", request.getSubClusterId());
} else {
subClusterInfo.setState(request.getState());
}
@ -181,17 +186,16 @@ public SubClusterDeregisterResponse deregisterSubCluster(
}
@Override
public SubClusterHeartbeatResponse subClusterHeartbeat(
SubClusterHeartbeatRequest request) throws YarnException {
public SubClusterHeartbeatResponse subClusterHeartbeat(SubClusterHeartbeatRequest request)
throws YarnException {
FederationMembershipStateStoreInputValidator.validate(request);
SubClusterId subClusterId = request.getSubClusterId();
SubClusterInfo subClusterInfo = membership.get(subClusterId);
if (subClusterInfo == null) {
String errMsg = "SubCluster " + subClusterId.toString()
+ " does not exist; cannot heartbeat";
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
FederationStateStoreUtils.logAndThrowStoreException(
LOG, "SubCluster %s does not exist; cannot heartbeat.", request.getSubClusterId());
}
long currentTime =
@ -205,11 +209,12 @@ public SubClusterHeartbeatResponse subClusterHeartbeat(
}
@Override
public GetSubClusterInfoResponse getSubCluster(
GetSubClusterInfoRequest request) throws YarnException {
public GetSubClusterInfoResponse getSubCluster(GetSubClusterInfoRequest request)
throws YarnException {
FederationMembershipStateStoreInputValidator.validate(request);
SubClusterId subClusterId = request.getSubClusterId();
if (!membership.containsKey(subClusterId)) {
LOG.warn("The queried SubCluster: {} does not exist.", subClusterId);
return null;
@ -219,16 +224,17 @@ public GetSubClusterInfoResponse getSubCluster(
}
@Override
public GetSubClustersInfoResponse getSubClusters(
GetSubClustersInfoRequest request) throws YarnException {
List<SubClusterInfo> result = new ArrayList<SubClusterInfo>();
public GetSubClustersInfoResponse getSubClusters(GetSubClustersInfoRequest request)
throws YarnException {
List<SubClusterInfo> result = new ArrayList<>();
for (SubClusterInfo info : membership.values()) {
if (!request.getFilterInactiveSubClusters()
|| info.getState().isActive()) {
if (!request.getFilterInactiveSubClusters() || info.getState().isActive()) {
result.add(info);
}
}
return GetSubClustersInfoResponse.newInstance(result);
}
@ -239,16 +245,16 @@ public AddApplicationHomeSubClusterResponse addApplicationHomeSubCluster(
AddApplicationHomeSubClusterRequest request) throws YarnException {
FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
ApplicationId appId =
request.getApplicationHomeSubCluster().getApplicationId();
ApplicationHomeSubCluster homeSubCluster = request.getApplicationHomeSubCluster();
ApplicationId appId = homeSubCluster.getApplicationId();
if (!applications.containsKey(appId)) {
applications.put(appId,
request.getApplicationHomeSubCluster().getHomeSubCluster());
applications.put(appId, homeSubCluster);
}
return AddApplicationHomeSubClusterResponse
.newInstance(applications.get(appId));
ApplicationHomeSubCluster respHomeSubCluster = applications.get(appId);
return AddApplicationHomeSubClusterResponse.newInstance(respHomeSubCluster.getHomeSubCluster());
}
@Override
@ -256,15 +262,16 @@ public UpdateApplicationHomeSubClusterResponse updateApplicationHomeSubCluster(
UpdateApplicationHomeSubClusterRequest request) throws YarnException {
FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
ApplicationId appId =
request.getApplicationHomeSubCluster().getApplicationId();
if (!applications.containsKey(appId)) {
String errMsg = "Application " + appId + " does not exist";
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
FederationStateStoreUtils.logAndThrowStoreException(LOG,
"Application %s does not exist.", appId);
}
applications.put(appId,
request.getApplicationHomeSubCluster().getHomeSubCluster());
applications.put(appId, request.getApplicationHomeSubCluster());
return UpdateApplicationHomeSubClusterResponse.newInstance();
}
@ -275,11 +282,12 @@ public GetApplicationHomeSubClusterResponse getApplicationHomeSubCluster(
FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
ApplicationId appId = request.getApplicationId();
if (!applications.containsKey(appId)) {
String errMsg = "Application " + appId + " does not exist";
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
FederationStateStoreUtils.logAndThrowStoreException(LOG,
"Application %s does not exist.", appId);
}
return GetApplicationHomeSubClusterResponse.newInstance(appId, applications.get(appId));
return GetApplicationHomeSubClusterResponse.newInstance(appId,
applications.get(appId).getHomeSubCluster());
}
@Override
@ -303,7 +311,7 @@ public GetApplicationsHomeSubClusterResponse getApplicationsHomeSubCluster(
}
private ApplicationHomeSubCluster generateAppHomeSC(ApplicationId applicationId) {
SubClusterId subClusterId = applications.get(applicationId);
SubClusterId subClusterId = applications.get(applicationId).getHomeSubCluster();
return ApplicationHomeSubCluster.newInstance(applicationId, subClusterId);
}
@ -314,8 +322,8 @@ public DeleteApplicationHomeSubClusterResponse deleteApplicationHomeSubCluster(
FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
ApplicationId appId = request.getApplicationId();
if (!applications.containsKey(appId)) {
String errMsg = "Application " + appId + " does not exist";
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
FederationStateStoreUtils.logAndThrowStoreException(LOG,
"Application %s does not exist.", appId);
}
applications.remove(appId);
@ -329,12 +337,11 @@ public GetSubClusterPolicyConfigurationResponse getPolicyConfiguration(
FederationPolicyStoreInputValidator.validate(request);
String queue = request.getQueue();
if (!policies.containsKey(queue)) {
LOG.warn("Policy for queue: {} does not exist.", queue);
LOG.warn("Policy for queue : {} does not exist.", queue);
return null;
}
return GetSubClusterPolicyConfigurationResponse
.newInstance(policies.get(queue));
return GetSubClusterPolicyConfigurationResponse.newInstance(policies.get(queue));
}
@Override
@ -350,8 +357,7 @@ public SetSubClusterPolicyConfigurationResponse setPolicyConfiguration(
@Override
public GetSubClusterPoliciesConfigurationsResponse getPoliciesConfigurations(
GetSubClusterPoliciesConfigurationsRequest request) throws YarnException {
ArrayList<SubClusterPolicyConfiguration> result =
new ArrayList<SubClusterPolicyConfiguration>();
ArrayList<SubClusterPolicyConfiguration> result = new ArrayList<>();
for (SubClusterPolicyConfiguration policy : policies.values()) {
result.add(policy);
}
@ -386,7 +392,8 @@ public GetReservationHomeSubClusterResponse getReservationHomeSubCluster(
FederationReservationHomeSubClusterStoreInputValidator.validate(request);
ReservationId reservationId = request.getReservationId();
if (!reservations.containsKey(reservationId)) {
throw new YarnException("Reservation " + reservationId + " does not exist");
FederationStateStoreUtils.logAndThrowStoreException(LOG,
"Reservation %s does not exist.", reservationId);
}
SubClusterId subClusterId = reservations.get(reservationId);
ReservationHomeSubCluster homeSubCluster =
@ -417,7 +424,8 @@ public UpdateReservationHomeSubClusterResponse updateReservationHomeSubCluster(
ReservationId reservationId = request.getReservationHomeSubCluster().getReservationId();
if (!reservations.containsKey(reservationId)) {
throw new YarnException("Reservation " + reservationId + " does not exist.");
FederationStateStoreUtils.logAndThrowStoreException(LOG,
"Reservation %s does not exist.", reservationId);
}
SubClusterId subClusterId = request.getReservationHomeSubCluster().getHomeSubCluster();
@ -431,7 +439,8 @@ public DeleteReservationHomeSubClusterResponse deleteReservationHomeSubCluster(
FederationReservationHomeSubClusterStoreInputValidator.validate(request);
ReservationId reservationId = request.getReservationId();
if (!reservations.containsKey(reservationId)) {
throw new YarnException("Reservation " + reservationId + " does not exist");
FederationStateStoreUtils.logAndThrowStoreException(LOG,
"Reservation %s does not exist.", reservationId);
}
reservations.remove(reservationId);
return DeleteReservationHomeSubClusterResponse.newInstance();
@ -446,9 +455,8 @@ public RouterMasterKeyResponse storeNewMasterKey(RouterMasterKeyRequest request)
Set<DelegationKey> rmDTMasterKeyState = routerRMSecretManagerState.getMasterKeyState();
if (rmDTMasterKeyState.contains(delegationKey)) {
LOG.info("Error storing info for RMDTMasterKey with keyID: {}.", delegationKey.getKeyId());
throw new IOException("RMDTMasterKey with keyID: " + delegationKey.getKeyId() +
" is already stored");
FederationStateStoreUtils.logAndThrowStoreException(LOG,
"Error storing info for RMDTMasterKey with keyID: %s.", delegationKey.getKeyId());
}
routerRMSecretManagerState.getMasterKeyState().add(delegationKey);

View File

@ -164,12 +164,9 @@ public void testDeregisterSubClusterUnknownSubCluster() throws Exception {
SubClusterDeregisterRequest deregisterRequest = SubClusterDeregisterRequest
.newInstance(subClusterId, SubClusterState.SC_UNREGISTERED);
try {
stateStore.deregisterSubCluster(deregisterRequest);
Assert.fail();
} catch (FederationStateStoreException e) {
Assert.assertTrue(e.getMessage().startsWith("SubCluster SC not found"));
}
LambdaTestUtils.intercept(YarnException.class,
"SubCluster SC not found", () -> stateStore.deregisterSubCluster(deregisterRequest));
}
@Test
@ -266,13 +263,9 @@ public void testSubClusterHeartbeatUnknownSubCluster() throws Exception {
SubClusterHeartbeatRequest heartbeatRequest = SubClusterHeartbeatRequest
.newInstance(subClusterId, SubClusterState.SC_RUNNING, "capability");
try {
stateStore.subClusterHeartbeat(heartbeatRequest);
Assert.fail();
} catch (FederationStateStoreException e) {
Assert.assertTrue(e.getMessage()
.startsWith("SubCluster SC does not exist; cannot heartbeat"));
}
LambdaTestUtils.intercept(YarnException.class,
"SubCluster SC does not exist; cannot heartbeat",
() -> stateStore.subClusterHeartbeat(heartbeatRequest));
}
// Test FederationApplicationHomeSubClusterStore