YARN-11376. [Federation] Support updateNodeResource、refreshNodesResources API's for Federation. (#5496)
This commit is contained in:
parent
762d3ddb43
commit
926993cb73
@ -36,4 +36,31 @@ public static RefreshNodesResourcesRequest newInstance() {
|
|||||||
Records.newRecord(RefreshNodesResourcesRequest.class);
|
Records.newRecord(RefreshNodesResourcesRequest.class);
|
||||||
return request;
|
return request;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Public
|
||||||
|
@Evolving
|
||||||
|
public static RefreshNodesResourcesRequest newInstance(String subClusterId) {
|
||||||
|
RefreshNodesResourcesRequest request =
|
||||||
|
Records.newRecord(RefreshNodesResourcesRequest.class);
|
||||||
|
request.setSubClusterId(subClusterId);
|
||||||
|
return request;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the subClusterId.
|
||||||
|
*
|
||||||
|
* @return subClusterId.
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Evolving
|
||||||
|
public abstract String getSubClusterId();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the subClusterId.
|
||||||
|
*
|
||||||
|
* @param subClusterId subCluster Id.
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Evolving
|
||||||
|
public abstract void setSubClusterId(String subClusterId);
|
||||||
}
|
}
|
||||||
|
@ -51,7 +51,18 @@ public static UpdateNodeResourceRequest newInstance(
|
|||||||
request.setNodeResourceMap(nodeResourceMap);
|
request.setNodeResourceMap(nodeResourceMap);
|
||||||
return request;
|
return request;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Public
|
||||||
|
@Evolving
|
||||||
|
public static UpdateNodeResourceRequest newInstance(
|
||||||
|
Map<NodeId, ResourceOption> nodeResourceMap, String subClusterId) {
|
||||||
|
UpdateNodeResourceRequest request =
|
||||||
|
Records.newRecord(UpdateNodeResourceRequest.class);
|
||||||
|
request.setNodeResourceMap(nodeResourceMap);
|
||||||
|
request.setSubClusterId(subClusterId);
|
||||||
|
return request;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the map from <code>NodeId</code> to <code>ResourceOption</code>.
|
* Get the map from <code>NodeId</code> to <code>ResourceOption</code>.
|
||||||
* @return the map of {@code <NodeId, ResourceOption>}
|
* @return the map of {@code <NodeId, ResourceOption>}
|
||||||
@ -68,4 +79,21 @@ public static UpdateNodeResourceRequest newInstance(
|
|||||||
@Evolving
|
@Evolving
|
||||||
public abstract void setNodeResourceMap(Map<NodeId, ResourceOption> nodeResourceMap);
|
public abstract void setNodeResourceMap(Map<NodeId, ResourceOption> nodeResourceMap);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the subClusterId.
|
||||||
|
*
|
||||||
|
* @return subClusterId.
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Evolving
|
||||||
|
public abstract String getSubClusterId();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the subClusterId.
|
||||||
|
*
|
||||||
|
* @param subClusterId subCluster Id.
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Evolving
|
||||||
|
public abstract void setSubClusterId(String subClusterId);
|
||||||
}
|
}
|
||||||
|
@ -79,12 +79,14 @@ message GetGroupsForUserResponseProto {
|
|||||||
|
|
||||||
message UpdateNodeResourceRequestProto {
|
message UpdateNodeResourceRequestProto {
|
||||||
repeated NodeResourceMapProto node_resource_map = 1;
|
repeated NodeResourceMapProto node_resource_map = 1;
|
||||||
|
optional string sub_cluster_id = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
message UpdateNodeResourceResponseProto {
|
message UpdateNodeResourceResponseProto {
|
||||||
}
|
}
|
||||||
|
|
||||||
message RefreshNodesResourcesRequestProto {
|
message RefreshNodesResourcesRequestProto {
|
||||||
|
optional string sub_cluster_id = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
message RefreshNodesResourcesResponseProto {
|
message RefreshNodesResourcesResponseProto {
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||||
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RefreshNodesResourcesRequestProto;
|
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RefreshNodesResourcesRequestProto;
|
||||||
|
import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RefreshNodesResourcesRequestProtoOrBuilder;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesRequest;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesRequest;
|
||||||
|
|
||||||
import org.apache.hadoop.thirdparty.protobuf.TextFormat;
|
import org.apache.hadoop.thirdparty.protobuf.TextFormat;
|
||||||
@ -69,4 +70,27 @@ public boolean equals(Object other) {
|
|||||||
public String toString() {
|
public String toString() {
|
||||||
return TextFormat.shortDebugString(getProto());
|
return TextFormat.shortDebugString(getProto());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void maybeInitBuilder() {
|
||||||
|
if (viaProto || builder == null) {
|
||||||
|
builder = RefreshNodesResourcesRequestProto.newBuilder(proto);
|
||||||
|
}
|
||||||
|
viaProto = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getSubClusterId() {
|
||||||
|
RefreshNodesResourcesRequestProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
|
return (p.hasSubClusterId()) ? p.getSubClusterId() : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setSubClusterId(String subClusterId) {
|
||||||
|
maybeInitBuilder();
|
||||||
|
if (subClusterId == null) {
|
||||||
|
builder.clearSubClusterId();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
builder.setSubClusterId(subClusterId);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -66,6 +66,22 @@ public void setNodeResourceMap(Map<NodeId, ResourceOption> nodeResourceMap) {
|
|||||||
this.nodeResourceMap.putAll(nodeResourceMap);
|
this.nodeResourceMap.putAll(nodeResourceMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getSubClusterId() {
|
||||||
|
UpdateNodeResourceRequestProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
|
return (p.hasSubClusterId()) ? p.getSubClusterId() : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setSubClusterId(String subClusterId) {
|
||||||
|
maybeInitBuilder();
|
||||||
|
if (subClusterId == null) {
|
||||||
|
builder.clearSubClusterId();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
builder.setSubClusterId(subClusterId);
|
||||||
|
}
|
||||||
|
|
||||||
public UpdateNodeResourceRequestProto getProto() {
|
public UpdateNodeResourceRequestProto getProto() {
|
||||||
mergeLocalToProto();
|
mergeLocalToProto();
|
||||||
proto = viaProto ? proto : builder.build();
|
proto = viaProto ? proto : builder.build();
|
||||||
|
@ -163,6 +163,10 @@ public final class RouterMetrics {
|
|||||||
private MutableGaugeInt numGetClusterInfoFailedRetrieved;
|
private MutableGaugeInt numGetClusterInfoFailedRetrieved;
|
||||||
@Metric("# of getClusterUserInfo failed to be retrieved")
|
@Metric("# of getClusterUserInfo failed to be retrieved")
|
||||||
private MutableGaugeInt numGetClusterUserInfoFailedRetrieved;
|
private MutableGaugeInt numGetClusterUserInfoFailedRetrieved;
|
||||||
|
@Metric("# of updateNodeResource failed to be retrieved")
|
||||||
|
private MutableGaugeInt numUpdateNodeResourceFailedRetrieved;
|
||||||
|
@Metric("# of refreshNodesResources failed to be retrieved")
|
||||||
|
private MutableGaugeInt numRefreshNodesResourcesFailedRetrieved;
|
||||||
|
|
||||||
// Aggregate metrics are shared, and don't have to be looked up per call
|
// Aggregate metrics are shared, and don't have to be looked up per call
|
||||||
@Metric("Total number of successful Submitted apps and latency(ms)")
|
@Metric("Total number of successful Submitted apps and latency(ms)")
|
||||||
@ -287,6 +291,10 @@ public final class RouterMetrics {
|
|||||||
private MutableRate totalSucceededGetClusterInfoRetrieved;
|
private MutableRate totalSucceededGetClusterInfoRetrieved;
|
||||||
@Metric("Total number of successful Retrieved GetClusterUserInfoRetrieved and latency(ms)")
|
@Metric("Total number of successful Retrieved GetClusterUserInfoRetrieved and latency(ms)")
|
||||||
private MutableRate totalSucceededGetClusterUserInfoRetrieved;
|
private MutableRate totalSucceededGetClusterUserInfoRetrieved;
|
||||||
|
@Metric("Total number of successful Retrieved UpdateNodeResource and latency(ms)")
|
||||||
|
private MutableRate totalSucceededUpdateNodeResourceRetrieved;
|
||||||
|
@Metric("Total number of successful Retrieved RefreshNodesResources and latency(ms)")
|
||||||
|
private MutableRate totalSucceededRefreshNodesResourcesRetrieved;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provide quantile counters for all latencies.
|
* Provide quantile counters for all latencies.
|
||||||
@ -352,6 +360,8 @@ public final class RouterMetrics {
|
|||||||
private MutableQuantiles removeFromClusterNodeLabelsLatency;
|
private MutableQuantiles removeFromClusterNodeLabelsLatency;
|
||||||
private MutableQuantiles getClusterInfoLatency;
|
private MutableQuantiles getClusterInfoLatency;
|
||||||
private MutableQuantiles getClusterUserInfoLatency;
|
private MutableQuantiles getClusterUserInfoLatency;
|
||||||
|
private MutableQuantiles updateNodeResourceLatency;
|
||||||
|
private MutableQuantiles refreshNodesResourcesLatency;
|
||||||
|
|
||||||
private static volatile RouterMetrics instance = null;
|
private static volatile RouterMetrics instance = null;
|
||||||
private static MetricsRegistry registry;
|
private static MetricsRegistry registry;
|
||||||
@ -567,6 +577,12 @@ private RouterMetrics() {
|
|||||||
|
|
||||||
getClusterUserInfoLatency = registry.newQuantiles("getClusterUserInfoLatency",
|
getClusterUserInfoLatency = registry.newQuantiles("getClusterUserInfoLatency",
|
||||||
"latency of get cluster user info timeouts", "ops", "latency", 10);
|
"latency of get cluster user info timeouts", "ops", "latency", 10);
|
||||||
|
|
||||||
|
updateNodeResourceLatency = registry.newQuantiles("updateNodeResourceLatency",
|
||||||
|
"latency of update node resource timeouts", "ops", "latency", 10);
|
||||||
|
|
||||||
|
refreshNodesResourcesLatency = registry.newQuantiles("refreshNodesResourcesLatency",
|
||||||
|
"latency of refresh nodes resources timeouts", "ops", "latency", 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static RouterMetrics getMetrics() {
|
public static RouterMetrics getMetrics() {
|
||||||
@ -873,6 +889,16 @@ public long getNumSucceededGetClusterUserInfoRetrieved() {
|
|||||||
return totalSucceededGetClusterUserInfoRetrieved.lastStat().numSamples();
|
return totalSucceededGetClusterUserInfoRetrieved.lastStat().numSamples();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public long getNumSucceededUpdateNodeResourceRetrieved() {
|
||||||
|
return totalSucceededUpdateNodeResourceRetrieved.lastStat().numSamples();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public long getNumSucceededRefreshNodesResourcesRetrieved() {
|
||||||
|
return totalSucceededRefreshNodesResourcesRetrieved.lastStat().numSamples();
|
||||||
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public long getNumSucceededRefreshSuperUserGroupsConfigurationRetrieved() {
|
public long getNumSucceededRefreshSuperUserGroupsConfigurationRetrieved() {
|
||||||
return totalSucceededRefreshSuperUserGroupsConfigurationRetrieved.lastStat().numSamples();
|
return totalSucceededRefreshSuperUserGroupsConfigurationRetrieved.lastStat().numSamples();
|
||||||
@ -1173,6 +1199,16 @@ public double getLatencySucceededGetClusterUserInfoRetrieved() {
|
|||||||
return totalSucceededGetClusterUserInfoRetrieved.lastStat().mean();
|
return totalSucceededGetClusterUserInfoRetrieved.lastStat().mean();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public double getLatencySucceededUpdateNodeResourceRetrieved() {
|
||||||
|
return totalSucceededUpdateNodeResourceRetrieved.lastStat().mean();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public double getLatencySucceededRefreshNodesResourcesRetrieved() {
|
||||||
|
return totalSucceededRefreshNodesResourcesRetrieved.lastStat().mean();
|
||||||
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public double getLatencySucceededRefreshSuperUserGroupsConfigurationRetrieved() {
|
public double getLatencySucceededRefreshSuperUserGroupsConfigurationRetrieved() {
|
||||||
return totalSucceededRefreshSuperUserGroupsConfigurationRetrieved.lastStat().mean();
|
return totalSucceededRefreshSuperUserGroupsConfigurationRetrieved.lastStat().mean();
|
||||||
@ -1426,6 +1462,14 @@ public int getClusterUserInfoFailedRetrieved() {
|
|||||||
return numGetClusterUserInfoFailedRetrieved.value();
|
return numGetClusterUserInfoFailedRetrieved.value();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getUpdateNodeResourceFailedRetrieved() {
|
||||||
|
return numUpdateNodeResourceFailedRetrieved.value();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getRefreshNodesResourcesFailedRetrieved() {
|
||||||
|
return numRefreshNodesResourcesFailedRetrieved.value();
|
||||||
|
}
|
||||||
|
|
||||||
public int getDelegationTokenFailedRetrieved() {
|
public int getDelegationTokenFailedRetrieved() {
|
||||||
return numGetDelegationTokenFailedRetrieved.value();
|
return numGetDelegationTokenFailedRetrieved.value();
|
||||||
}
|
}
|
||||||
@ -1739,6 +1783,16 @@ public void succeededGetClusterUserInfoRetrieved(long duration) {
|
|||||||
getClusterUserInfoLatency.add(duration);
|
getClusterUserInfoLatency.add(duration);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void succeededUpdateNodeResourceRetrieved(long duration) {
|
||||||
|
totalSucceededUpdateNodeResourceRetrieved.add(duration);
|
||||||
|
updateNodeResourceLatency.add(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void succeededRefreshNodesResourcesRetrieved(long duration) {
|
||||||
|
totalSucceededRefreshNodesResourcesRetrieved.add(duration);
|
||||||
|
refreshNodesResourcesLatency.add(duration);
|
||||||
|
}
|
||||||
|
|
||||||
public void succeededRefreshSuperUserGroupsConfRetrieved(long duration) {
|
public void succeededRefreshSuperUserGroupsConfRetrieved(long duration) {
|
||||||
totalSucceededRefreshSuperUserGroupsConfigurationRetrieved.add(duration);
|
totalSucceededRefreshSuperUserGroupsConfigurationRetrieved.add(duration);
|
||||||
refreshSuperUserGroupsConfLatency.add(duration);
|
refreshSuperUserGroupsConfLatency.add(duration);
|
||||||
@ -1967,6 +2021,14 @@ public void incrGetClusterUserInfoFailedRetrieved() {
|
|||||||
numGetClusterUserInfoFailedRetrieved.incr();
|
numGetClusterUserInfoFailedRetrieved.incr();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void incrUpdateNodeResourceFailedRetrieved() {
|
||||||
|
numUpdateNodeResourceFailedRetrieved.incr();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrRefreshNodesResourcesFailedRetrieved() {
|
||||||
|
numRefreshNodesResourcesFailedRetrieved.incr();
|
||||||
|
}
|
||||||
|
|
||||||
public void incrGetDelegationTokenFailedRetrieved() {
|
public void incrGetDelegationTokenFailedRetrieved() {
|
||||||
numGetDelegationTokenFailedRetrieved.incr();
|
numGetDelegationTokenFailedRetrieved.incr();
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
import org.apache.commons.lang3.NotImplementedException;
|
import org.apache.commons.lang3.NotImplementedException;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.hadoop.classification.VisibleForTesting;
|
import org.apache.hadoop.classification.VisibleForTesting;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||||
@ -112,7 +113,7 @@ public void init(String userName) {
|
|||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
protected ResourceManagerAdministrationProtocol getAdminRMProxyForSubCluster(
|
protected ResourceManagerAdministrationProtocol getAdminRMProxyForSubCluster(
|
||||||
SubClusterId subClusterId) throws YarnException {
|
SubClusterId subClusterId) throws Exception {
|
||||||
|
|
||||||
if (adminRMProxies.containsKey(subClusterId)) {
|
if (adminRMProxies.containsKey(subClusterId)) {
|
||||||
return adminRMProxies.get(subClusterId);
|
return adminRMProxies.get(subClusterId);
|
||||||
@ -438,13 +439,75 @@ public RefreshServiceAclsResponse refreshServiceAcls(RefreshServiceAclsRequest r
|
|||||||
@Override
|
@Override
|
||||||
public UpdateNodeResourceResponse updateNodeResource(UpdateNodeResourceRequest request)
|
public UpdateNodeResourceResponse updateNodeResource(UpdateNodeResourceRequest request)
|
||||||
throws YarnException, IOException {
|
throws YarnException, IOException {
|
||||||
throw new NotImplementedException();
|
|
||||||
|
// parameter verification.
|
||||||
|
if (request == null) {
|
||||||
|
routerMetrics.incrUpdateNodeResourceFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowException("Missing UpdateNodeResource request.", null);
|
||||||
|
}
|
||||||
|
|
||||||
|
String subClusterId = request.getSubClusterId();
|
||||||
|
if (StringUtils.isBlank(subClusterId)) {
|
||||||
|
routerMetrics.incrUpdateNodeResourceFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowException("Missing UpdateNodeResource SubClusterId.", null);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
long startTime = clock.getTime();
|
||||||
|
RMAdminProtocolMethod remoteMethod = new RMAdminProtocolMethod(
|
||||||
|
new Class[]{UpdateNodeResourceRequest.class}, new Object[]{request});
|
||||||
|
Collection<UpdateNodeResourceResponse> updateNodeResourceResps =
|
||||||
|
remoteMethod.invokeConcurrent(this, UpdateNodeResourceResponse.class, subClusterId);
|
||||||
|
if (CollectionUtils.isNotEmpty(updateNodeResourceResps)) {
|
||||||
|
long stopTime = clock.getTime();
|
||||||
|
routerMetrics.succeededUpdateNodeResourceRetrieved(stopTime - startTime);
|
||||||
|
return UpdateNodeResourceResponse.newInstance();
|
||||||
|
}
|
||||||
|
} catch (YarnException e) {
|
||||||
|
routerMetrics.incrUpdateNodeResourceFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowException(e,
|
||||||
|
"Unable to updateNodeResource due to exception. " + e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
routerMetrics.incrUpdateNodeResourceFailedRetrieved();
|
||||||
|
throw new YarnException("Unable to updateNodeResource.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public RefreshNodesResourcesResponse refreshNodesResources(RefreshNodesResourcesRequest request)
|
public RefreshNodesResourcesResponse refreshNodesResources(RefreshNodesResourcesRequest request)
|
||||||
throws YarnException, IOException {
|
throws YarnException, IOException {
|
||||||
throw new NotImplementedException();
|
|
||||||
|
// parameter verification.
|
||||||
|
if (request == null) {
|
||||||
|
routerMetrics.incrRefreshNodesResourcesFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowException("Missing RefreshNodesResources request.", null);
|
||||||
|
}
|
||||||
|
|
||||||
|
String subClusterId = request.getSubClusterId();
|
||||||
|
if (StringUtils.isBlank(subClusterId)) {
|
||||||
|
routerMetrics.incrRefreshNodesResourcesFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowException("Missing RefreshNodesResources SubClusterId.", null);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
long startTime = clock.getTime();
|
||||||
|
RMAdminProtocolMethod remoteMethod = new RMAdminProtocolMethod(
|
||||||
|
new Class[]{RefreshNodesResourcesRequest.class}, new Object[]{request});
|
||||||
|
Collection<RefreshNodesResourcesResponse> refreshNodesResourcesResps =
|
||||||
|
remoteMethod.invokeConcurrent(this, RefreshNodesResourcesResponse.class, subClusterId);
|
||||||
|
if (CollectionUtils.isNotEmpty(refreshNodesResourcesResps)) {
|
||||||
|
long stopTime = clock.getTime();
|
||||||
|
routerMetrics.succeededRefreshNodesResourcesRetrieved(stopTime - startTime);
|
||||||
|
return RefreshNodesResourcesResponse.newInstance();
|
||||||
|
}
|
||||||
|
} catch (YarnException e) {
|
||||||
|
routerMetrics.incrRefreshNodesResourcesFailedRetrieved();
|
||||||
|
RouterServerUtil.logAndThrowException(e,
|
||||||
|
"Unable to refreshNodesResources due to exception. " + e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
routerMetrics.incrRefreshNodesResourcesFailedRetrieved();
|
||||||
|
throw new YarnException("Unable to refreshNodesResources.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -578,6 +578,16 @@ public void getClusterUserInfoFailed() {
|
|||||||
LOG.info("Mocked: failed getClusterUserInfo call");
|
LOG.info("Mocked: failed getClusterUserInfo call");
|
||||||
metrics.incrGetClusterUserInfoFailedRetrieved();
|
metrics.incrGetClusterUserInfoFailedRetrieved();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void getUpdateNodeResourceFailed() {
|
||||||
|
LOG.info("Mocked: failed getClusterUserInfo call");
|
||||||
|
metrics.incrUpdateNodeResourceFailedRetrieved();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getRefreshNodesResourcesFailed() {
|
||||||
|
LOG.info("Mocked: failed refreshNodesResources call");
|
||||||
|
metrics.incrRefreshNodesResourcesFailedRetrieved();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Records successes for all calls
|
// Records successes for all calls
|
||||||
@ -858,6 +868,16 @@ public void getClusterUserInfoRetrieved(long duration) {
|
|||||||
LOG.info("Mocked: successful GetClusterUserInfoRetrieved call with duration {}", duration);
|
LOG.info("Mocked: successful GetClusterUserInfoRetrieved call with duration {}", duration);
|
||||||
metrics.succeededGetClusterUserInfoRetrieved(duration);
|
metrics.succeededGetClusterUserInfoRetrieved(duration);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void getUpdateNodeResourceRetrieved(long duration) {
|
||||||
|
LOG.info("Mocked: successful UpdateNodeResourceRetrieved call with duration {}", duration);
|
||||||
|
metrics.succeededUpdateNodeResourceRetrieved(duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void getRefreshNodesResourcesRetrieved(long duration) {
|
||||||
|
LOG.info("Mocked: successful RefreshNodesResourcesRetrieved call with duration {}", duration);
|
||||||
|
metrics.succeededRefreshNodesResourcesRetrieved(duration);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -1912,4 +1932,48 @@ public void testGetClusterUserInfoRetrieved() {
|
|||||||
Assert.assertEquals(225,
|
Assert.assertEquals(225,
|
||||||
metrics.getLatencySucceededGetClusterUserInfoRetrieved(), ASSERT_DOUBLE_DELTA);
|
metrics.getLatencySucceededGetClusterUserInfoRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpdateNodeResourceRetrievedFailed() {
|
||||||
|
long totalBadBefore = metrics.getUpdateNodeResourceFailedRetrieved();
|
||||||
|
badSubCluster.getUpdateNodeResourceFailed();
|
||||||
|
Assert.assertEquals(totalBadBefore + 1, metrics.getUpdateNodeResourceFailedRetrieved());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpdateNodeResourceRetrieved() {
|
||||||
|
long totalGoodBefore = metrics.getNumSucceededGetClusterUserInfoRetrieved();
|
||||||
|
goodSubCluster.getUpdateNodeResourceRetrieved(150);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 1,
|
||||||
|
metrics.getNumSucceededUpdateNodeResourceRetrieved());
|
||||||
|
Assert.assertEquals(150,
|
||||||
|
metrics.getLatencySucceededUpdateNodeResourceRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
goodSubCluster.getUpdateNodeResourceRetrieved(300);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 2,
|
||||||
|
metrics.getNumSucceededUpdateNodeResourceRetrieved());
|
||||||
|
Assert.assertEquals(225,
|
||||||
|
metrics.getLatencySucceededUpdateNodeResourceRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRefreshNodesResourcesRetrievedFailed() {
|
||||||
|
long totalBadBefore = metrics.getRefreshNodesResourcesFailedRetrieved();
|
||||||
|
badSubCluster.getRefreshNodesResourcesFailed();
|
||||||
|
Assert.assertEquals(totalBadBefore + 1, metrics.getRefreshNodesResourcesFailedRetrieved());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRefreshNodesResourcesRetrieved() {
|
||||||
|
long totalGoodBefore = metrics.getNumSucceededRefreshNodesResourcesRetrieved();
|
||||||
|
goodSubCluster.getRefreshNodesResourcesRetrieved(150);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 1,
|
||||||
|
metrics.getNumSucceededRefreshNodesResourcesRetrieved());
|
||||||
|
Assert.assertEquals(150,
|
||||||
|
metrics.getLatencySucceededRefreshNodesResourcesRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
goodSubCluster.getRefreshNodesResourcesRetrieved(300);
|
||||||
|
Assert.assertEquals(totalGoodBefore + 2,
|
||||||
|
metrics.getNumSucceededRefreshNodesResourcesRetrieved());
|
||||||
|
Assert.assertEquals(225,
|
||||||
|
metrics.getLatencySucceededRefreshNodesResourcesRetrieved(), ASSERT_DOUBLE_DELTA);
|
||||||
|
}
|
||||||
}
|
}
|
@ -22,6 +22,9 @@
|
|||||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
import org.apache.hadoop.test.LambdaTestUtils;
|
import org.apache.hadoop.test.LambdaTestUtils;
|
||||||
import org.apache.hadoop.yarn.api.records.DecommissionType;
|
import org.apache.hadoop.yarn.api.records.DecommissionType;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesRequest;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesRequest;
|
||||||
@ -35,6 +38,10 @@
|
|||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsResponse;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsResponse;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsRequest;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsRequest;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsResponse;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsResponse;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceResponse;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesRequest;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshNodesResourcesResponse;
|
||||||
import org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore;
|
import org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore;
|
||||||
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
|
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
|
||||||
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
|
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
|
||||||
@ -45,7 +52,9 @@
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
|
||||||
@ -63,6 +72,7 @@ public class TestFederationRMAdminInterceptor extends BaseRouterRMAdminTest {
|
|||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
private final static String USER_NAME = "test-user";
|
private final static String USER_NAME = "test-user";
|
||||||
private final static int NUM_SUBCLUSTER = 4;
|
private final static int NUM_SUBCLUSTER = 4;
|
||||||
|
private final static int GB = 1024;
|
||||||
|
|
||||||
private TestableFederationRMAdminInterceptor interceptor;
|
private TestableFederationRMAdminInterceptor interceptor;
|
||||||
private FederationStateStoreFacade facade;
|
private FederationStateStoreFacade facade;
|
||||||
@ -320,4 +330,62 @@ public void testSC1RefreshServiceAcls() throws Exception {
|
|||||||
LambdaTestUtils.intercept(Exception.class, "subClusterId = SC-NON is not an active subCluster.",
|
LambdaTestUtils.intercept(Exception.class, "subClusterId = SC-NON is not an active subCluster.",
|
||||||
() -> interceptor.refreshServiceAcls(request1));
|
() -> interceptor.refreshServiceAcls(request1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpdateNodeResourceEmptyRequest() throws Exception {
|
||||||
|
// null request1.
|
||||||
|
LambdaTestUtils.intercept(YarnException.class, "Missing UpdateNodeResource request.",
|
||||||
|
() -> interceptor.updateNodeResource(null));
|
||||||
|
|
||||||
|
// null request2.
|
||||||
|
Map<NodeId, ResourceOption> nodeResourceMap = new HashMap<>();
|
||||||
|
UpdateNodeResourceRequest request = UpdateNodeResourceRequest.newInstance(nodeResourceMap);
|
||||||
|
LambdaTestUtils.intercept(YarnException.class, "Missing UpdateNodeResource SubClusterId.",
|
||||||
|
() -> interceptor.updateNodeResource(request));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUpdateNodeResourceNormalRequest() throws Exception {
|
||||||
|
// case 1, test the existing subCluster (SC-1).
|
||||||
|
Map<NodeId, ResourceOption> nodeResourceMap = new HashMap<>();
|
||||||
|
NodeId nodeId = NodeId.newInstance("127.0.0.1", 1);
|
||||||
|
ResourceOption resourceOption =
|
||||||
|
ResourceOption.newInstance(Resource.newInstance(2 * GB, 1), -1);
|
||||||
|
nodeResourceMap.put(nodeId, resourceOption);
|
||||||
|
UpdateNodeResourceRequest request =
|
||||||
|
UpdateNodeResourceRequest.newInstance(nodeResourceMap, "SC-1");
|
||||||
|
UpdateNodeResourceResponse response = interceptor.updateNodeResource(request);
|
||||||
|
assertNotNull(response);
|
||||||
|
|
||||||
|
// case 2, test the non-exist subCluster.
|
||||||
|
UpdateNodeResourceRequest request1 =
|
||||||
|
UpdateNodeResourceRequest.newInstance(nodeResourceMap, "SC-NON");
|
||||||
|
LambdaTestUtils.intercept(Exception.class, "subClusterId = SC-NON is not an active subCluster.",
|
||||||
|
() -> interceptor.updateNodeResource(request1));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRefreshNodesResourcesEmptyRequest() throws Exception {
|
||||||
|
// null request1.
|
||||||
|
LambdaTestUtils.intercept(YarnException.class, "Missing RefreshNodesResources request.",
|
||||||
|
() -> interceptor.refreshNodesResources(null));
|
||||||
|
|
||||||
|
// null request2.
|
||||||
|
RefreshNodesResourcesRequest request = RefreshNodesResourcesRequest.newInstance();
|
||||||
|
LambdaTestUtils.intercept(YarnException.class, "Missing RefreshNodesResources SubClusterId.",
|
||||||
|
() -> interceptor.refreshNodesResources(request));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRefreshNodesResourcesNormalRequest() throws Exception {
|
||||||
|
// case 1, test the existing subCluster (SC-1).
|
||||||
|
RefreshNodesResourcesRequest request = RefreshNodesResourcesRequest.newInstance("SC-1");
|
||||||
|
RefreshNodesResourcesResponse response = interceptor.refreshNodesResources(request);
|
||||||
|
assertNotNull(response);
|
||||||
|
|
||||||
|
// case 2, test the non-exist subCluster.
|
||||||
|
RefreshNodesResourcesRequest request1 = RefreshNodesResourcesRequest.newInstance("SC-NON");
|
||||||
|
LambdaTestUtils.intercept(Exception.class, "subClusterId = SC-NON is not an active subCluster.",
|
||||||
|
() -> interceptor.refreshNodesResources(request1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -52,7 +52,7 @@ public class TestableFederationRMAdminInterceptor extends FederationRMAdminInter
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected ResourceManagerAdministrationProtocol getAdminRMProxyForSubCluster(
|
protected ResourceManagerAdministrationProtocol getAdminRMProxyForSubCluster(
|
||||||
SubClusterId subClusterId) throws YarnException {
|
SubClusterId subClusterId) throws Exception {
|
||||||
MockRM mockRM;
|
MockRM mockRM;
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
if (mockRMs.containsKey(subClusterId)) {
|
if (mockRMs.containsKey(subClusterId)) {
|
||||||
@ -66,6 +66,7 @@ protected ResourceManagerAdministrationProtocol getAdminRMProxyForSubCluster(
|
|||||||
}
|
}
|
||||||
mockRM.init(config);
|
mockRM.init(config);
|
||||||
mockRM.start();
|
mockRM.start();
|
||||||
|
mockRM.registerNode("127.0.0.1:1", 102400, 100);
|
||||||
mockRMs.put(subClusterId, mockRM);
|
mockRMs.put(subClusterId, mockRM);
|
||||||
}
|
}
|
||||||
return mockRM.getAdminService();
|
return mockRM.getAdminService();
|
||||||
|
Loading…
Reference in New Issue
Block a user