YARN-8699. Add Yarnclient#yarnclusterMetrics API implementation in router. Contributed by Bibin A Chundatt.
This commit is contained in:
parent
ab90248b30
commit
3dc2988a37
@ -3315,6 +3315,11 @@ public static boolean isAclEnabled(Configuration conf) {
|
||||
|
||||
public static final String ROUTER_WEBAPP_PREFIX = ROUTER_PREFIX + "webapp.";
|
||||
|
||||
public static final String ROUTER_USER_CLIENT_THREADS_SIZE =
|
||||
ROUTER_PREFIX + "interceptor.user.threadpool-size";
|
||||
|
||||
public static final int DEFAULT_ROUTER_USER_CLIENT_THREADS_SIZE = 5;
|
||||
|
||||
/** The address of the Router web application. */
|
||||
public static final String ROUTER_WEBAPP_ADDRESS =
|
||||
ROUTER_WEBAPP_PREFIX + "address";
|
||||
|
@ -3470,6 +3470,14 @@
|
||||
<value>org.apache.hadoop.yarn.server.router.clientrm.DefaultClientRequestInterceptor</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>
|
||||
The thread pool executor size of per user in Router ClientRM Service FederationClientInterceptor.
|
||||
</description>
|
||||
<name>yarn.router.interceptor.user.threadpool-size</name>
|
||||
<value>5</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>
|
||||
Size of LRU cache for Router ClientRM Service and RMAdmin Service.
|
||||
|
@ -126,6 +126,7 @@
|
||||
import org.apache.hadoop.yarn.api.records.UpdatedContainer;
|
||||
import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState;
|
||||
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
||||
import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
|
||||
import org.apache.hadoop.yarn.client.AMRMClientUtils;
|
||||
import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException;
|
||||
import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException;
|
||||
@ -515,8 +516,8 @@ public GetClusterMetricsResponse getClusterMetrics(
|
||||
GetClusterMetricsRequest request) throws YarnException, IOException {
|
||||
|
||||
validateRunning();
|
||||
|
||||
return GetClusterMetricsResponse.newInstance(null);
|
||||
YarnClusterMetrics clusterMetrics = YarnClusterMetrics.newInstance(1);
|
||||
return GetClusterMetricsResponse.newInstance(clusterMetrics);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -0,0 +1,71 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.yarn.server.router.clientrm;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Class to define client method,params and arguments.
|
||||
*/
|
||||
public class ClientMethod {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(ClientMethod.class);
|
||||
/**
|
||||
* List of parameters: static and dynamic values, matchings types.
|
||||
*/
|
||||
private final Object[] params;
|
||||
/**
|
||||
* List of method parameters types, matches parameters.
|
||||
*/
|
||||
private final Class<?>[] types;
|
||||
/**
|
||||
* String name of the method.
|
||||
*/
|
||||
private final String methodName;
|
||||
|
||||
public ClientMethod(String method, Class<?>[] pTypes, Object... pParams)
|
||||
throws IOException {
|
||||
if (pParams.length != pTypes.length) {
|
||||
throw new IOException("Invalid parameters for method " + method);
|
||||
}
|
||||
|
||||
this.params = pParams;
|
||||
this.types = Arrays.copyOf(pTypes, pTypes.length);
|
||||
this.methodName = method;
|
||||
}
|
||||
|
||||
public Object[] getParams() {
|
||||
return Arrays.copyOf(this.params, this.params.length);
|
||||
}
|
||||
|
||||
public String getMethodName() {
|
||||
return methodName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the calling types for this method.
|
||||
*
|
||||
* @return An array of calling types.
|
||||
*/
|
||||
public Class<?>[] getTypes() {
|
||||
return Arrays.copyOf(this.types, this.types.length);
|
||||
}
|
||||
}
|
@ -18,13 +18,24 @@
|
||||
|
||||
package org.apache.hadoop.yarn.server.router.clientrm;
|
||||
|
||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.apache.commons.lang3.NotImplementedException;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
|
||||
@ -140,6 +151,7 @@ public class FederationClientInterceptor
|
||||
private Random rand;
|
||||
private RouterPolicyFacade policyFacade;
|
||||
private RouterMetrics routerMetrics;
|
||||
private ThreadPoolExecutor executorService;
|
||||
private final Clock clock = new MonotonicClock();
|
||||
|
||||
@Override
|
||||
@ -149,6 +161,17 @@ public void init(String userName) {
|
||||
federationFacade = FederationStateStoreFacade.getInstance();
|
||||
rand = new Random(System.currentTimeMillis());
|
||||
|
||||
|
||||
int numThreads = getConf().getInt(
|
||||
YarnConfiguration.ROUTER_USER_CLIENT_THREADS_SIZE,
|
||||
YarnConfiguration.DEFAULT_ROUTER_USER_CLIENT_THREADS_SIZE);
|
||||
ThreadFactory threadFactory = new ThreadFactoryBuilder()
|
||||
.setNameFormat("RPC Router Client-" + userName + "-%d ").build();
|
||||
|
||||
BlockingQueue workQueue = new LinkedBlockingQueue<>();
|
||||
this.executorService = new ThreadPoolExecutor(numThreads, numThreads,
|
||||
0L, TimeUnit.MILLISECONDS, workQueue, threadFactory);
|
||||
|
||||
final Configuration conf = this.getConf();
|
||||
|
||||
try {
|
||||
@ -570,7 +593,72 @@ public GetApplicationsResponse getApplications(GetApplicationsRequest request)
|
||||
@Override
|
||||
public GetClusterMetricsResponse getClusterMetrics(
|
||||
GetClusterMetricsRequest request) throws YarnException, IOException {
|
||||
throw new NotImplementedException("Code is not implemented");
|
||||
Map<SubClusterId, SubClusterInfo> subclusters =
|
||||
federationFacade.getSubClusters(true);
|
||||
ClientMethod remoteMethod = new ClientMethod("getClusterMetrics",
|
||||
new Class[] {GetClusterMetricsRequest.class}, new Object[] {request});
|
||||
ArrayList<SubClusterId> clusterList = new ArrayList<>(subclusters.keySet());
|
||||
Map<SubClusterId, GetClusterMetricsResponse> clusterMetrics =
|
||||
invokeConcurrent(clusterList, remoteMethod,
|
||||
GetClusterMetricsResponse.class);
|
||||
return RouterYarnClientUtils.merge(clusterMetrics.values());
|
||||
}
|
||||
|
||||
<R> Map<SubClusterId, R> invokeConcurrent(ArrayList<SubClusterId> clusterIds,
|
||||
ClientMethod request, Class<R> clazz) throws YarnException, IOException {
|
||||
List<Callable<Object>> callables = new ArrayList<>();
|
||||
List<Future<Object>> futures = new ArrayList<>();
|
||||
Map<SubClusterId, IOException> exceptions = new TreeMap<>();
|
||||
for (SubClusterId subClusterId : clusterIds) {
|
||||
callables.add(new Callable<Object>() {
|
||||
@Override
|
||||
public Object call() throws Exception {
|
||||
ApplicationClientProtocol protocol =
|
||||
getClientRMProxyForSubCluster(subClusterId);
|
||||
Method method = ApplicationClientProtocol.class
|
||||
.getDeclaredMethod(request.getMethodName(), request.getTypes());
|
||||
return method.invoke(protocol, request.getParams());
|
||||
}
|
||||
});
|
||||
}
|
||||
Map<SubClusterId, R> results = new TreeMap<>();
|
||||
try {
|
||||
futures.addAll(executorService.invokeAll(callables));
|
||||
for (int i = 0; i < futures.size(); i++) {
|
||||
SubClusterId subClusterId = clusterIds.get(i);
|
||||
try {
|
||||
Future<Object> future = futures.get(i);
|
||||
Object result = future.get();
|
||||
results.put(subClusterId, clazz.cast(result));
|
||||
} catch (ExecutionException ex) {
|
||||
Throwable cause = ex.getCause();
|
||||
LOG.debug("Cannot execute {} on {}: {}", request.getMethodName(),
|
||||
subClusterId.getId(), cause.getMessage());
|
||||
IOException ioe;
|
||||
if (cause instanceof IOException) {
|
||||
ioe = (IOException) cause;
|
||||
} else if (cause instanceof YarnException) {
|
||||
throw (YarnException) cause;
|
||||
} else {
|
||||
ioe = new IOException(
|
||||
"Unhandled exception while calling " + request.getMethodName()
|
||||
+ ": " + cause.getMessage(), cause);
|
||||
}
|
||||
// Store the exceptions
|
||||
exceptions.put(subClusterId, ioe);
|
||||
}
|
||||
}
|
||||
if (results.isEmpty()) {
|
||||
SubClusterId subClusterId = clusterIds.get(0);
|
||||
IOException ioe = exceptions.get(subClusterId);
|
||||
if (ioe != null) {
|
||||
throw ioe;
|
||||
}
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
throw new YarnException(e);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -732,4 +820,10 @@ public GetAllResourceTypeInfoResponse getResourceTypeInfo(
|
||||
GetAllResourceTypeInfoRequest request) throws YarnException, IOException {
|
||||
throw new NotImplementedException("Code is not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutdown() {
|
||||
executorService.shutdown();
|
||||
super.shutdown();
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,55 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.yarn.server.router.clientrm;
|
||||
|
||||
import java.util.Collection;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
|
||||
import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
|
||||
|
||||
/**
|
||||
* Util class for Router Yarn client API calls.
|
||||
*/
|
||||
public final class RouterYarnClientUtils {
|
||||
|
||||
private RouterYarnClientUtils() {
|
||||
|
||||
}
|
||||
|
||||
public static GetClusterMetricsResponse merge(
|
||||
Collection<GetClusterMetricsResponse> responses) {
|
||||
YarnClusterMetrics tmp = YarnClusterMetrics.newInstance(0);
|
||||
for (GetClusterMetricsResponse response : responses) {
|
||||
YarnClusterMetrics metrics = response.getClusterMetrics();
|
||||
tmp.setNumNodeManagers(
|
||||
tmp.getNumNodeManagers() + metrics.getNumNodeManagers());
|
||||
tmp.setNumActiveNodeManagers(
|
||||
tmp.getNumActiveNodeManagers() + metrics.getNumActiveNodeManagers());
|
||||
tmp.setNumDecommissionedNodeManagers(
|
||||
tmp.getNumDecommissionedNodeManagers() + metrics
|
||||
.getNumDecommissionedNodeManagers());
|
||||
tmp.setNumLostNodeManagers(
|
||||
tmp.getNumLostNodeManagers() + metrics.getNumLostNodeManagers());
|
||||
tmp.setNumRebootedNodeManagers(tmp.getNumRebootedNodeManagers() + metrics
|
||||
.getNumRebootedNodeManagers());
|
||||
tmp.setNumUnhealthyNodeManagers(
|
||||
tmp.getNumUnhealthyNodeManagers() + metrics
|
||||
.getNumUnhealthyNodeManagers());
|
||||
}
|
||||
return GetClusterMetricsResponse.newInstance(tmp);
|
||||
}
|
||||
}
|
@ -24,6 +24,8 @@
|
||||
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
|
||||
@ -130,7 +132,7 @@ protected YarnConfiguration createConfiguration() {
|
||||
@Test
|
||||
public void testGetNewApplication()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
System.out.println("Test FederationClientInterceptor: Get New Application");
|
||||
LOG.info("Test FederationClientInterceptor: Get New Application");
|
||||
|
||||
GetNewApplicationRequest request = GetNewApplicationRequest.newInstance();
|
||||
GetNewApplicationResponse response = interceptor.getNewApplication(request);
|
||||
@ -149,7 +151,7 @@ public void testGetNewApplication()
|
||||
@Test
|
||||
public void testSubmitApplication()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
System.out.println("Test FederationClientInterceptor: Submit Application");
|
||||
LOG.info("Test FederationClientInterceptor: Submit Application");
|
||||
|
||||
ApplicationId appId =
|
||||
ApplicationId.newInstance(System.currentTimeMillis(), 1);
|
||||
@ -174,7 +176,7 @@ public void testSubmitApplication()
|
||||
@Test
|
||||
public void testSubmitApplicationMultipleSubmission()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
System.out.println(
|
||||
LOG.info(
|
||||
"Test FederationClientInterceptor: Submit Application - Multiple");
|
||||
|
||||
ApplicationId appId =
|
||||
@ -207,7 +209,7 @@ public void testSubmitApplicationMultipleSubmission()
|
||||
@Test
|
||||
public void testSubmitApplicationEmptyRequest()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
System.out.println(
|
||||
LOG.info(
|
||||
"Test FederationClientInterceptor: Submit Application - Empty");
|
||||
try {
|
||||
interceptor.submitApplication(null);
|
||||
@ -246,8 +248,7 @@ public void testSubmitApplicationEmptyRequest()
|
||||
@Test
|
||||
public void testForceKillApplication()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
System.out
|
||||
.println("Test FederationClientInterceptor: Force Kill Application");
|
||||
LOG.info("Test FederationClientInterceptor: Force Kill Application");
|
||||
|
||||
ApplicationId appId =
|
||||
ApplicationId.newInstance(System.currentTimeMillis(), 1);
|
||||
@ -276,7 +277,7 @@ public void testForceKillApplication()
|
||||
@Test
|
||||
public void testForceKillApplicationNotExists()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
System.out.println("Test FederationClientInterceptor: "
|
||||
LOG.info("Test FederationClientInterceptor: "
|
||||
+ "Force Kill Application - Not Exists");
|
||||
|
||||
ApplicationId appId =
|
||||
@ -299,7 +300,7 @@ public void testForceKillApplicationNotExists()
|
||||
@Test
|
||||
public void testForceKillApplicationEmptyRequest()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
System.out.println(
|
||||
LOG.info(
|
||||
"Test FederationClientInterceptor: Force Kill Application - Empty");
|
||||
try {
|
||||
interceptor.forceKillApplication(null);
|
||||
@ -325,8 +326,7 @@ public void testForceKillApplicationEmptyRequest()
|
||||
@Test
|
||||
public void testGetApplicationReport()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
System.out
|
||||
.println("Test FederationClientInterceptor: Get Application Report");
|
||||
LOG.info("Test FederationClientInterceptor: Get Application Report");
|
||||
|
||||
ApplicationId appId =
|
||||
ApplicationId.newInstance(System.currentTimeMillis(), 1);
|
||||
@ -357,7 +357,7 @@ public void testGetApplicationReport()
|
||||
@Test
|
||||
public void testGetApplicationNotExists()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
System.out.println(
|
||||
LOG.info(
|
||||
"Test ApplicationClientProtocol: Get Application Report - Not Exists");
|
||||
ApplicationId appId =
|
||||
ApplicationId.newInstance(System.currentTimeMillis(), 1);
|
||||
@ -379,7 +379,7 @@ public void testGetApplicationNotExists()
|
||||
@Test
|
||||
public void testGetApplicationEmptyRequest()
|
||||
throws YarnException, IOException, InterruptedException {
|
||||
System.out.println(
|
||||
LOG.info(
|
||||
"Test FederationClientInterceptor: Get Application Report - Empty");
|
||||
try {
|
||||
interceptor.getApplicationReport(null);
|
||||
@ -400,4 +400,17 @@ public void testGetApplicationEmptyRequest()
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetClusterMetricsRequest() throws YarnException, IOException {
|
||||
LOG.info("Test FederationClientInterceptor : Get Cluster Metrics request");
|
||||
// null request
|
||||
GetClusterMetricsResponse response = interceptor.getClusterMetrics(null);
|
||||
Assert.assertEquals(subClusters.size(),
|
||||
response.getClusterMetrics().getNumNodeManagers());
|
||||
// normal request.
|
||||
response =
|
||||
interceptor.getClusterMetrics(GetClusterMetricsRequest.newInstance());
|
||||
Assert.assertEquals(subClusters.size(),
|
||||
response.getClusterMetrics().getNumNodeManagers());
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,57 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.router.clientrm;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
|
||||
import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Test class for RouterYarnClientUtils.
|
||||
*/
|
||||
public class TestRouterYarnClientUtils {
|
||||
|
||||
@Test
|
||||
public void testClusterMetricsMerge() {
|
||||
ArrayList<GetClusterMetricsResponse> responses = new ArrayList<>();
|
||||
responses.add(getClusterMetricsResponse(1));
|
||||
responses.add(getClusterMetricsResponse(2));
|
||||
GetClusterMetricsResponse result = RouterYarnClientUtils.merge(responses);
|
||||
YarnClusterMetrics resultMetrics = result.getClusterMetrics();
|
||||
Assert.assertEquals(3, resultMetrics.getNumNodeManagers());
|
||||
Assert.assertEquals(3, resultMetrics.getNumActiveNodeManagers());
|
||||
Assert.assertEquals(3, resultMetrics.getNumDecommissionedNodeManagers());
|
||||
Assert.assertEquals(3, resultMetrics.getNumLostNodeManagers());
|
||||
Assert.assertEquals(3, resultMetrics.getNumRebootedNodeManagers());
|
||||
Assert.assertEquals(3, resultMetrics.getNumUnhealthyNodeManagers());
|
||||
}
|
||||
|
||||
public GetClusterMetricsResponse getClusterMetricsResponse(int value) {
|
||||
YarnClusterMetrics metrics = YarnClusterMetrics.newInstance(value);
|
||||
metrics.setNumUnhealthyNodeManagers(value);
|
||||
metrics.setNumRebootedNodeManagers(value);
|
||||
metrics.setNumLostNodeManagers(value);
|
||||
metrics.setNumDecommissionedNodeManagers(value);
|
||||
metrics.setNumActiveNodeManagers(value);
|
||||
metrics.setNumNodeManagers(value);
|
||||
return GetClusterMetricsResponse.newInstance(metrics);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user