From 500695d7260689ef77f075c64eef69f684722b29 Mon Sep 17 00:00:00 2001 From: Gour Saha Date: Tue, 21 Feb 2017 08:52:47 -0800 Subject: [PATCH] YARN-6185. Apply SLIDER-1199 to yarn native services for blacklisting nodes. Contributed by Billie Rinaldi --- .../providers/AbstractProviderService.java | 6 +++ .../server/appmaster/SliderAppMaster.java | 8 +++- .../appmaster/actions/ResetFailureWindow.java | 14 +++++- .../operations/AsyncRMOperationHandler.java | 6 +++ .../ProviderNotifyingOperationHandler.java | 8 ++++ .../operations/RMOperationHandlerActions.java | 18 +++++--- .../operations/UpdateBlacklistOperation.java | 45 +++++++++++++++++++ .../server/appmaster/state/AppState.java | 14 ++++++ .../server/appmaster/state/NodeInstance.java | 10 +++++ .../server/appmaster/state/RoleHistory.java | 34 ++++++++++++++ 10 files changed, 155 insertions(+), 8 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/UpdateBlacklistOperation.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java index 00fc606d9b..41b26e9df9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/providers/AbstractProviderService.java @@ -410,6 +410,12 @@ public abstract class AbstractProviderService return 0; } + @Override + public void updateBlacklist(List blacklistAdditions, + List blacklistRemovals) { + // no-op + } + @Override public void execute(List operations) { for (AbstractRMOperation operation : operations) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java index a9a6b6bb91..f67ea58a9e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java @@ -1755,7 +1755,7 @@ public class SliderAppMaster extends AbstractSliderLaunchedService */ private void scheduleFailureWindowResets(ConfTree resources) throws BadConfigException { - ResetFailureWindow reset = new ResetFailureWindow(); + ResetFailureWindow reset = new ResetFailureWindow(rmOperationHandler); ConfTreeOperations ops = new ConfTreeOperations(resources); MapOperations globals = ops.getGlobalOptions(); long seconds = globals.getTimeRange(ResourceKeys.CONTAINER_FAILURE_WINDOW, @@ -1988,6 +1988,12 @@ public class SliderAppMaster extends AbstractSliderLaunchedService rmOperationHandler.cancelSingleRequest(request); } + @Override + public void updateBlacklist(List blacklistAdditions, + List blacklistRemovals) { + rmOperationHandler.updateBlacklist(blacklistAdditions, blacklistRemovals); + } + /* =================================================================== */ /* END */ /* =================================================================== */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ResetFailureWindow.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ResetFailureWindow.java index 28bcf5554f..36f58dd0d6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ResetFailureWindow.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ResetFailureWindow.java @@ -19,21 +19,31 @@ package org.apache.slider.server.appmaster.actions; import org.apache.slider.server.appmaster.SliderAppMaster; +import org.apache.slider.server.appmaster.operations.AbstractRMOperation; +import org.apache.slider.server.appmaster.operations.RMOperationHandlerActions; import org.apache.slider.server.appmaster.state.AppState; /** * Requests the AM to reset the failure window */ public class ResetFailureWindow extends AsyncAction { + private final RMOperationHandlerActions operationHandler; - public ResetFailureWindow() { + public ResetFailureWindow(RMOperationHandlerActions operationHandler) { super("ResetFailureWindow"); + this.operationHandler = operationHandler; } @Override public void execute(SliderAppMaster appMaster, QueueAccess queueService, AppState appState) throws Exception { - appState.resetFailureCounts(); + synchronized (appMaster) { + appState.resetFailureCounts(); + AbstractRMOperation blacklistOperation = appState.updateBlacklist(); + if (blacklistOperation != null) { + blacklistOperation.execute(operationHandler); + } + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AsyncRMOperationHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AsyncRMOperationHandler.java index 03231ef2f8..71733544a9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AsyncRMOperationHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/AsyncRMOperationHandler.java @@ -107,4 +107,10 @@ public class AsyncRMOperationHandler extends RMOperationHandler { public void addContainerRequest(AMRMClient.ContainerRequest req) { client.addContainerRequest(req); } + + @Override + public void updateBlacklist(List blacklistAdditions, + List blacklistRemovals) { + client.updateBlacklist(blacklistAdditions, blacklistRemovals); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ProviderNotifyingOperationHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ProviderNotifyingOperationHandler.java index 184a36a698..972cc30a1d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ProviderNotifyingOperationHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/ProviderNotifyingOperationHandler.java @@ -23,6 +23,8 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.client.api.AMRMClient; import org.apache.slider.providers.ProviderService; +import java.util.List; + public class ProviderNotifyingOperationHandler extends RMOperationHandler { private final ProviderService providerService; @@ -52,4 +54,10 @@ public class ProviderNotifyingOperationHandler extends RMOperationHandler { public void cancelSingleRequest(AMRMClient.ContainerRequest request) { providerService.cancelSingleRequest(request); } + + @Override + public void updateBlacklist(List blacklistAdditions, + List blacklistRemovals) { + providerService.updateBlacklist(blacklistAdditions, blacklistRemovals); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandlerActions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandlerActions.java index b7794edd71..bbaa933ce1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandlerActions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/RMOperationHandlerActions.java @@ -27,25 +27,25 @@ import java.util.List; public interface RMOperationHandlerActions { /** - * Release an assigned container + * Release an assigned container. * @param containerId container */ void releaseAssignedContainer(ContainerId containerId); /** - * Issue a container request + * Issue a container request. * @param request */ void addContainerRequest(AMRMClient.ContainerRequest request); /** - * Cancel a specific request + * Cancel a specific request. * @param request request to cancel */ void cancelSingleRequest(AMRMClient.ContainerRequest request); /** - * Remove a container request + * Remove a container request. * @param priority1 priority to remove at * @param priority2 second priority to target * @param count number to remove @@ -53,7 +53,15 @@ public interface RMOperationHandlerActions { int cancelContainerRequests(Priority priority1, Priority priority2, int count); /** - * Execute an entire list of operations + * Blacklist resources. + * @param blacklistAdditions resources to add to the blacklist + * @param blacklistRemovals resources to remove from the blacklist + */ + void updateBlacklist(List blacklistAdditions, + List blacklistRemovals); + + /** + * Execute an entire list of operations. * @param operations ops */ void execute(List operations); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/UpdateBlacklistOperation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/UpdateBlacklistOperation.java new file mode 100644 index 0000000000..90e2e5deee --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/operations/UpdateBlacklistOperation.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.slider.server.appmaster.operations; + +import java.util.List; + +/** + * Update blacklisted resources for the application. + */ +public class UpdateBlacklistOperation extends AbstractRMOperation { + private final List blacklistAdditions; + private final List blacklistRemovals; + + public UpdateBlacklistOperation(List blacklistAdditions, + List blacklistRemovals) { + this.blacklistAdditions = blacklistAdditions; + this.blacklistRemovals = blacklistRemovals; + } + + @Override + public void execute(RMOperationHandlerActions handler) { + handler.updateBlacklist(blacklistAdditions, blacklistRemovals); + } + + @Override + public String toString() { + return "blacklist additions: " + blacklistAdditions + + ", blacklist removals: " + blacklistRemovals; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java index 6db375d9a9..6f54959911 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/state/AppState.java @@ -70,6 +70,7 @@ import org.apache.slider.server.appmaster.management.MetricsConstants; import org.apache.slider.server.appmaster.operations.AbstractRMOperation; import org.apache.slider.server.appmaster.operations.ContainerReleaseOperation; import org.apache.slider.server.appmaster.operations.ContainerRequestOperation; +import org.apache.slider.server.appmaster.operations.UpdateBlacklistOperation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -1934,6 +1935,15 @@ public class AppState { return results; } + public synchronized AbstractRMOperation updateBlacklist() { + UpdateBlacklistOperation blacklistOperation = + roleHistory.updateBlacklist(getRoleStatusMap().values()); + if (blacklistOperation != null) { + log.info("Updating {}", blacklistOperation); + } + return blacklistOperation; + } + /** * Look at where the current node state is -and whether it should be changed */ @@ -1941,6 +1951,10 @@ public class AppState { throws SliderInternalStateException, TriggerClusterTeardownException { log.debug("in reviewRequestAndReleaseNodes()"); List allOperations = new ArrayList<>(); + AbstractRMOperation blacklistOperation = updateBlacklist(); + if (blacklistOperation != null) { + allOperations.add(blacklistOperation); + } for (RoleStatus roleStatus : getRoleStatusMap().values()) { if (!roleStatus.isExcludeFromFlexing()) { List operations = reviewOneRole(roleStatus); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java index fd60d7dc49..120d402ff2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/state/NodeInstance.java @@ -42,6 +42,8 @@ public class NodeInstance { public final String hostname; + private boolean blacklisted = false; + /** * last state of node. Starts off as {@link NodeState#RUNNING}, * on the assumption that it is live. @@ -81,6 +83,14 @@ public class NodeInstance { nodeEntries = new ArrayList<>(roles); } + public synchronized void setBlacklisted(boolean blacklisted) { + this.blacklisted = blacklisted; + } + + public boolean isBlacklisted() { + return blacklisted; + } + /** * Update the node status. * The return code is true if the node state changed enough to diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java index 4e8a4d722d..38c70f373f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-slider/hadoop-yarn-slider-core/src/main/java/org/apache/slider/server/appmaster/state/RoleHistory.java @@ -34,6 +34,7 @@ import org.apache.slider.server.appmaster.management.BoolMetric; import org.apache.slider.server.appmaster.management.MetricsAndMonitoring; import org.apache.slider.server.appmaster.management.Timestamp; import org.apache.slider.server.appmaster.operations.AbstractRMOperation; +import org.apache.slider.server.appmaster.operations.UpdateBlacklistOperation; import org.apache.slider.server.avro.LoadedRoleHistory; import org.apache.slider.server.avro.NodeEntryRecord; import org.apache.slider.server.avro.RoleHistoryHeader; @@ -49,6 +50,7 @@ import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; /** @@ -546,6 +548,38 @@ public class RoleHistory { } } + public synchronized UpdateBlacklistOperation updateBlacklist( + Collection roleStatuses) { + List blacklistAdditions = new ArrayList<>(); + List blacklistRemovals = new ArrayList<>(); + for (Entry nodeInstanceEntry : nodemap.entrySet()) { + boolean shouldBeBlacklisted = false; + String nodeHost = nodeInstanceEntry.getKey(); + NodeInstance nodeInstance = nodeInstanceEntry.getValue(); + for (RoleStatus roleStatus : roleStatuses) { + if (nodeInstance.exceedsFailureThreshold(roleStatus)) { + shouldBeBlacklisted = true; + break; + } + } + if (shouldBeBlacklisted) { + if (!nodeInstance.isBlacklisted()) { + blacklistAdditions.add(nodeHost); + nodeInstance.setBlacklisted(true); + } + } else { + if (nodeInstance.isBlacklisted()) { + blacklistRemovals.add(nodeHost); + nodeInstance.setBlacklisted(false); + } + } + } + if (blacklistAdditions.isEmpty() && blacklistRemovals.isEmpty()) { + return null; + } + return new UpdateBlacklistOperation(blacklistAdditions, blacklistRemovals); + } + /** * Find a node for use * @param role role