YARN-8699. Add Yarnclient#yarnclusterMetrics API implementation in router. Contributed by Bibin A Chundatt.

This commit is contained in:
Giovanni Matteo Fumarola 2018-09-07 11:32:03 -07:00
parent ab90248b30
commit 3dc2988a37
8 changed files with 319 additions and 15 deletions

View File

@ -3315,6 +3315,11 @@ public static boolean isAclEnabled(Configuration conf) {
public static final String ROUTER_WEBAPP_PREFIX = ROUTER_PREFIX + "webapp.";
public static final String ROUTER_USER_CLIENT_THREADS_SIZE =
ROUTER_PREFIX + "interceptor.user.threadpool-size";
public static final int DEFAULT_ROUTER_USER_CLIENT_THREADS_SIZE = 5;
/** The address of the Router web application. */
public static final String ROUTER_WEBAPP_ADDRESS =
ROUTER_WEBAPP_PREFIX + "address";

View File

@ -3470,6 +3470,14 @@
<value>org.apache.hadoop.yarn.server.router.clientrm.DefaultClientRequestInterceptor</value>
</property>
<property>
<description>
The thread pool executor size of per user in Router ClientRM Service FederationClientInterceptor.
</description>
<name>yarn.router.interceptor.user.threadpool-size</name>
<value>5</value>
</property>
<property>
<description>
Size of LRU cache for Router ClientRM Service and RMAdmin Service.

View File

@ -126,6 +126,7 @@
import org.apache.hadoop.yarn.api.records.UpdatedContainer;
import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
import org.apache.hadoop.yarn.client.AMRMClientUtils;
import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException;
import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException;
@ -515,8 +516,8 @@ public GetClusterMetricsResponse getClusterMetrics(
GetClusterMetricsRequest request) throws YarnException, IOException {
validateRunning();
return GetClusterMetricsResponse.newInstance(null);
YarnClusterMetrics clusterMetrics = YarnClusterMetrics.newInstance(1);
return GetClusterMetricsResponse.newInstance(clusterMetrics);
}
@Override

View File

@ -0,0 +1,71 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.router.clientrm;
import java.io.IOException;
import java.util.Arrays;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Class to define client method,params and arguments.
*/
public class ClientMethod {
private static final Logger LOG = LoggerFactory.getLogger(ClientMethod.class);
/**
* List of parameters: static and dynamic values, matchings types.
*/
private final Object[] params;
/**
* List of method parameters types, matches parameters.
*/
private final Class<?>[] types;
/**
* String name of the method.
*/
private final String methodName;
public ClientMethod(String method, Class<?>[] pTypes, Object... pParams)
throws IOException {
if (pParams.length != pTypes.length) {
throw new IOException("Invalid parameters for method " + method);
}
this.params = pParams;
this.types = Arrays.copyOf(pTypes, pTypes.length);
this.methodName = method;
}
public Object[] getParams() {
return Arrays.copyOf(this.params, this.params.length);
}
public String getMethodName() {
return methodName;
}
/**
* Get the calling types for this method.
*
* @return An array of calling types.
*/
public Class<?>[] getTypes() {
return Arrays.copyOf(this.types, this.types.length);
}
}

View File

@ -18,13 +18,24 @@
package org.apache.hadoop.yarn.server.router.clientrm;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import java.io.IOException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.TreeMap;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
@ -140,6 +151,7 @@ public class FederationClientInterceptor
private Random rand;
private RouterPolicyFacade policyFacade;
private RouterMetrics routerMetrics;
private ThreadPoolExecutor executorService;
private final Clock clock = new MonotonicClock();
@Override
@ -149,6 +161,17 @@ public void init(String userName) {
federationFacade = FederationStateStoreFacade.getInstance();
rand = new Random(System.currentTimeMillis());
int numThreads = getConf().getInt(
YarnConfiguration.ROUTER_USER_CLIENT_THREADS_SIZE,
YarnConfiguration.DEFAULT_ROUTER_USER_CLIENT_THREADS_SIZE);
ThreadFactory threadFactory = new ThreadFactoryBuilder()
.setNameFormat("RPC Router Client-" + userName + "-%d ").build();
BlockingQueue workQueue = new LinkedBlockingQueue<>();
this.executorService = new ThreadPoolExecutor(numThreads, numThreads,
0L, TimeUnit.MILLISECONDS, workQueue, threadFactory);
final Configuration conf = this.getConf();
try {
@ -570,7 +593,72 @@ public GetApplicationsResponse getApplications(GetApplicationsRequest request)
@Override
public GetClusterMetricsResponse getClusterMetrics(
GetClusterMetricsRequest request) throws YarnException, IOException {
throw new NotImplementedException("Code is not implemented");
Map<SubClusterId, SubClusterInfo> subclusters =
federationFacade.getSubClusters(true);
ClientMethod remoteMethod = new ClientMethod("getClusterMetrics",
new Class[] {GetClusterMetricsRequest.class}, new Object[] {request});
ArrayList<SubClusterId> clusterList = new ArrayList<>(subclusters.keySet());
Map<SubClusterId, GetClusterMetricsResponse> clusterMetrics =
invokeConcurrent(clusterList, remoteMethod,
GetClusterMetricsResponse.class);
return RouterYarnClientUtils.merge(clusterMetrics.values());
}
<R> Map<SubClusterId, R> invokeConcurrent(ArrayList<SubClusterId> clusterIds,
ClientMethod request, Class<R> clazz) throws YarnException, IOException {
List<Callable<Object>> callables = new ArrayList<>();
List<Future<Object>> futures = new ArrayList<>();
Map<SubClusterId, IOException> exceptions = new TreeMap<>();
for (SubClusterId subClusterId : clusterIds) {
callables.add(new Callable<Object>() {
@Override
public Object call() throws Exception {
ApplicationClientProtocol protocol =
getClientRMProxyForSubCluster(subClusterId);
Method method = ApplicationClientProtocol.class
.getDeclaredMethod(request.getMethodName(), request.getTypes());
return method.invoke(protocol, request.getParams());
}
});
}
Map<SubClusterId, R> results = new TreeMap<>();
try {
futures.addAll(executorService.invokeAll(callables));
for (int i = 0; i < futures.size(); i++) {
SubClusterId subClusterId = clusterIds.get(i);
try {
Future<Object> future = futures.get(i);
Object result = future.get();
results.put(subClusterId, clazz.cast(result));
} catch (ExecutionException ex) {
Throwable cause = ex.getCause();
LOG.debug("Cannot execute {} on {}: {}", request.getMethodName(),
subClusterId.getId(), cause.getMessage());
IOException ioe;
if (cause instanceof IOException) {
ioe = (IOException) cause;
} else if (cause instanceof YarnException) {
throw (YarnException) cause;
} else {
ioe = new IOException(
"Unhandled exception while calling " + request.getMethodName()
+ ": " + cause.getMessage(), cause);
}
// Store the exceptions
exceptions.put(subClusterId, ioe);
}
}
if (results.isEmpty()) {
SubClusterId subClusterId = clusterIds.get(0);
IOException ioe = exceptions.get(subClusterId);
if (ioe != null) {
throw ioe;
}
}
} catch (InterruptedException e) {
throw new YarnException(e);
}
return results;
}
@Override
@ -732,4 +820,10 @@ public GetAllResourceTypeInfoResponse getResourceTypeInfo(
GetAllResourceTypeInfoRequest request) throws YarnException, IOException {
throw new NotImplementedException("Code is not implemented");
}
@Override
public void shutdown() {
executorService.shutdown();
super.shutdown();
}
}

View File

@ -0,0 +1,55 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.router.clientrm;
import java.util.Collection;
import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
/**
* Util class for Router Yarn client API calls.
*/
public final class RouterYarnClientUtils {
private RouterYarnClientUtils() {
}
public static GetClusterMetricsResponse merge(
Collection<GetClusterMetricsResponse> responses) {
YarnClusterMetrics tmp = YarnClusterMetrics.newInstance(0);
for (GetClusterMetricsResponse response : responses) {
YarnClusterMetrics metrics = response.getClusterMetrics();
tmp.setNumNodeManagers(
tmp.getNumNodeManagers() + metrics.getNumNodeManagers());
tmp.setNumActiveNodeManagers(
tmp.getNumActiveNodeManagers() + metrics.getNumActiveNodeManagers());
tmp.setNumDecommissionedNodeManagers(
tmp.getNumDecommissionedNodeManagers() + metrics
.getNumDecommissionedNodeManagers());
tmp.setNumLostNodeManagers(
tmp.getNumLostNodeManagers() + metrics.getNumLostNodeManagers());
tmp.setNumRebootedNodeManagers(tmp.getNumRebootedNodeManagers() + metrics
.getNumRebootedNodeManagers());
tmp.setNumUnhealthyNodeManagers(
tmp.getNumUnhealthyNodeManagers() + metrics
.getNumUnhealthyNodeManagers());
}
return GetClusterMetricsResponse.newInstance(tmp);
}
}

View File

@ -24,6 +24,8 @@
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
@ -130,7 +132,7 @@ protected YarnConfiguration createConfiguration() {
@Test
public void testGetNewApplication()
throws YarnException, IOException, InterruptedException {
System.out.println("Test FederationClientInterceptor: Get New Application");
LOG.info("Test FederationClientInterceptor: Get New Application");
GetNewApplicationRequest request = GetNewApplicationRequest.newInstance();
GetNewApplicationResponse response = interceptor.getNewApplication(request);
@ -149,7 +151,7 @@ public void testGetNewApplication()
@Test
public void testSubmitApplication()
throws YarnException, IOException, InterruptedException {
System.out.println("Test FederationClientInterceptor: Submit Application");
LOG.info("Test FederationClientInterceptor: Submit Application");
ApplicationId appId =
ApplicationId.newInstance(System.currentTimeMillis(), 1);
@ -174,7 +176,7 @@ public void testSubmitApplication()
@Test
public void testSubmitApplicationMultipleSubmission()
throws YarnException, IOException, InterruptedException {
System.out.println(
LOG.info(
"Test FederationClientInterceptor: Submit Application - Multiple");
ApplicationId appId =
@ -207,7 +209,7 @@ public void testSubmitApplicationMultipleSubmission()
@Test
public void testSubmitApplicationEmptyRequest()
throws YarnException, IOException, InterruptedException {
System.out.println(
LOG.info(
"Test FederationClientInterceptor: Submit Application - Empty");
try {
interceptor.submitApplication(null);
@ -246,8 +248,7 @@ public void testSubmitApplicationEmptyRequest()
@Test
public void testForceKillApplication()
throws YarnException, IOException, InterruptedException {
System.out
.println("Test FederationClientInterceptor: Force Kill Application");
LOG.info("Test FederationClientInterceptor: Force Kill Application");
ApplicationId appId =
ApplicationId.newInstance(System.currentTimeMillis(), 1);
@ -276,7 +277,7 @@ public void testForceKillApplication()
@Test
public void testForceKillApplicationNotExists()
throws YarnException, IOException, InterruptedException {
System.out.println("Test FederationClientInterceptor: "
LOG.info("Test FederationClientInterceptor: "
+ "Force Kill Application - Not Exists");
ApplicationId appId =
@ -299,7 +300,7 @@ public void testForceKillApplicationNotExists()
@Test
public void testForceKillApplicationEmptyRequest()
throws YarnException, IOException, InterruptedException {
System.out.println(
LOG.info(
"Test FederationClientInterceptor: Force Kill Application - Empty");
try {
interceptor.forceKillApplication(null);
@ -325,8 +326,7 @@ public void testForceKillApplicationEmptyRequest()
@Test
public void testGetApplicationReport()
throws YarnException, IOException, InterruptedException {
System.out
.println("Test FederationClientInterceptor: Get Application Report");
LOG.info("Test FederationClientInterceptor: Get Application Report");
ApplicationId appId =
ApplicationId.newInstance(System.currentTimeMillis(), 1);
@ -357,7 +357,7 @@ public void testGetApplicationReport()
@Test
public void testGetApplicationNotExists()
throws YarnException, IOException, InterruptedException {
System.out.println(
LOG.info(
"Test ApplicationClientProtocol: Get Application Report - Not Exists");
ApplicationId appId =
ApplicationId.newInstance(System.currentTimeMillis(), 1);
@ -379,7 +379,7 @@ public void testGetApplicationNotExists()
@Test
public void testGetApplicationEmptyRequest()
throws YarnException, IOException, InterruptedException {
System.out.println(
LOG.info(
"Test FederationClientInterceptor: Get Application Report - Empty");
try {
interceptor.getApplicationReport(null);
@ -400,4 +400,17 @@ public void testGetApplicationEmptyRequest()
}
}
@Test
public void testGetClusterMetricsRequest() throws YarnException, IOException {
LOG.info("Test FederationClientInterceptor : Get Cluster Metrics request");
// null request
GetClusterMetricsResponse response = interceptor.getClusterMetrics(null);
Assert.assertEquals(subClusters.size(),
response.getClusterMetrics().getNumNodeManagers());
// normal request.
response =
interceptor.getClusterMetrics(GetClusterMetricsRequest.newInstance());
Assert.assertEquals(subClusters.size(),
response.getClusterMetrics().getNumNodeManagers());
}
}

View File

@ -0,0 +1,57 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.router.clientrm;
import java.util.ArrayList;
import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
import org.junit.Assert;
import org.junit.Test;
/**
* Test class for RouterYarnClientUtils.
*/
public class TestRouterYarnClientUtils {
@Test
public void testClusterMetricsMerge() {
ArrayList<GetClusterMetricsResponse> responses = new ArrayList<>();
responses.add(getClusterMetricsResponse(1));
responses.add(getClusterMetricsResponse(2));
GetClusterMetricsResponse result = RouterYarnClientUtils.merge(responses);
YarnClusterMetrics resultMetrics = result.getClusterMetrics();
Assert.assertEquals(3, resultMetrics.getNumNodeManagers());
Assert.assertEquals(3, resultMetrics.getNumActiveNodeManagers());
Assert.assertEquals(3, resultMetrics.getNumDecommissionedNodeManagers());
Assert.assertEquals(3, resultMetrics.getNumLostNodeManagers());
Assert.assertEquals(3, resultMetrics.getNumRebootedNodeManagers());
Assert.assertEquals(3, resultMetrics.getNumUnhealthyNodeManagers());
}
public GetClusterMetricsResponse getClusterMetricsResponse(int value) {
YarnClusterMetrics metrics = YarnClusterMetrics.newInstance(value);
metrics.setNumUnhealthyNodeManagers(value);
metrics.setNumRebootedNodeManagers(value);
metrics.setNumLostNodeManagers(value);
metrics.setNumDecommissionedNodeManagers(value);
metrics.setNumActiveNodeManagers(value);
metrics.setNumNodeManagers(value);
return GetClusterMetricsResponse.newInstance(metrics);
}
}