From 8c0759d02a9a530cfdd25e0a8f410cd74a8ac4c8 Mon Sep 17 00:00:00 2001 From: Weiwei Yang Date: Thu, 29 Aug 2019 18:14:39 +0800 Subject: [PATCH] YARN-9664. Improve response of scheduler/app activities for better understanding. Contributed by Tao Yang. --- .../activities/ActivitiesLogger.java | 184 ++++--- .../activities/ActivitiesManager.java | 39 +- .../scheduler/activities/ActivitiesUtils.java | 12 +- .../ActivityDiagnosticConstant.java | 112 +++-- .../scheduler/activities/ActivityLevel.java | 29 ++ .../scheduler/activities/ActivityNode.java | 36 +- .../activities/AllocationActivity.java | 46 +- .../scheduler/activities/AppAllocation.java | 42 +- .../scheduler/activities/NodeAllocation.java | 25 +- .../scheduler/capacity/CapacityScheduler.java | 42 +- .../scheduler/capacity/LeafQueue.java | 19 +- .../scheduler/capacity/ParentQueue.java | 48 +- .../allocator/RegularContainerAllocator.java | 94 ++-- .../webapp/JAXBContextResolver.java | 3 +- .../resourcemanager/webapp/RMWebServices.java | 37 +- .../webapp/dao/ActivitiesInfo.java | 40 +- .../webapp/dao/ActivityNodeInfo.java | 48 +- .../webapp/dao/AppActivitiesInfo.java | 44 +- .../webapp/dao/AppAllocationInfo.java | 24 +- .../webapp/dao/AppRequestAllocationInfo.java | 25 +- .../webapp/dao/NodeAllocationInfo.java | 26 +- .../activities/TestActivitiesManager.java | 70 +-- .../webapp/ActivitiesTestUtils.java | 139 +++++- .../TestRMWebServicesForCSWithPartitions.java | 97 ++++ .../TestRMWebServicesSchedulerActivities.java | 461 +++++++++++++----- ...edulerActivitiesWithMultiNodesEnabled.java | 194 +++++--- 26 files changed, 1322 insertions(+), 614 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityLevel.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java index e698d1a4d0..76fccfb07a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java @@ -30,10 +30,11 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; +import java.util.function.Supplier; + /** * Utility for logging scheduler activities */ -// FIXME: make sure CandidateNodeSet works with this class public class ActivitiesLogger { private static final Logger LOG = LoggerFactory.getLogger(ActivitiesLogger.class); @@ -52,9 +53,9 @@ public static void recordSkippedAppActivityWithoutAllocation( ActivitiesManager activitiesManager, SchedulerNode node, SchedulerApplicationAttempt application, SchedulerRequestKey requestKey, - String diagnostic) { + String diagnostic, ActivityLevel level) { recordAppActivityWithoutAllocation(activitiesManager, node, application, - requestKey, diagnostic, ActivityState.SKIPPED); + requestKey, diagnostic, ActivityState.SKIPPED, level); } /* @@ -72,7 +73,7 @@ public static void recordRejectedAppActivityFromLeafQueue( if (activitiesManager.shouldRecordThisNode(nodeId)) { recordActivity(activitiesManager, nodeId, application.getQueueName(), application.getApplicationId().toString(), priority, - ActivityState.REJECTED, diagnostic, "app"); + ActivityState.REJECTED, diagnostic, ActivityLevel.APP); } finishSkippedAppAllocationRecording(activitiesManager, application.getApplicationId(), ActivityState.REJECTED, diagnostic); @@ -87,50 +88,55 @@ public static void recordAppActivityWithoutAllocation( ActivitiesManager activitiesManager, SchedulerNode node, SchedulerApplicationAttempt application, SchedulerRequestKey schedulerKey, - String diagnostic, ActivityState appState) { + String diagnostic, ActivityState appState, ActivityLevel level) { if (activitiesManager == null) { return; } NodeId nodeId = getRecordingNodeId(activitiesManager, node); if (activitiesManager.shouldRecordThisNode(nodeId)) { - if (schedulerKey != null) { - String allocationRequestId = - String.valueOf(schedulerKey.getAllocationRequestId()); - String priorityStr = getPriorityStr(schedulerKey); - String requestName = getRequestName(priorityStr, allocationRequestId); - String type = "container"; - // Add application-container activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(nodeId, - requestName, null, - priorityStr, appState, diagnostic, type, - null); - type = "request"; - // Add application-container activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(nodeId, - application.getApplicationId().toString(), requestName, - priorityStr, appState, - ActivityDiagnosticConstant.EMPTY, type, allocationRequestId); + String requestName = null; + Integer priority = null; + Long allocationRequestId = null; + if (level == ActivityLevel.NODE || level == ActivityLevel.REQUEST) { + if (schedulerKey == null) { + LOG.warn("Request key should not be null at " + level + " level."); + return; + } + priority = getPriority(schedulerKey); + allocationRequestId = schedulerKey.getAllocationRequestId(); + requestName = getRequestName(priority, allocationRequestId); + } + switch (level) { + case NODE: + recordSchedulerActivityAtNodeLevel(activitiesManager, application, + requestName, priority, allocationRequestId, null, nodeId, + appState, diagnostic); + break; + case REQUEST: + recordSchedulerActivityAtRequestLevel(activitiesManager, application, + requestName, priority, allocationRequestId, nodeId, appState, + diagnostic); + break; + case APP: + recordSchedulerActivityAtAppLevel(activitiesManager, application, + nodeId, appState, diagnostic); + break; + default: + LOG.warn("Doesn't handle app activities at " + level + " level."); + break; } - // Add queue-application activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(nodeId, - application.getQueueName(), - application.getApplicationId().toString(), - application.getPriority().toString(), appState, - schedulerKey != null ? ActivityDiagnosticConstant.EMPTY : - diagnostic, "app", null); } // Add application-container activity into specific application allocation // Under this condition, it fails to allocate a container to this // application, so containerId is null. if (activitiesManager.shouldRecordThisApp( application.getApplicationId())) { - String type = "container"; activitiesManager.addSchedulingActivityForApp( application.getApplicationId(), null, - getPriorityStr(schedulerKey), appState, - diagnostic, type, nodeId, + getPriority(schedulerKey), appState, + diagnostic, level, nodeId, schedulerKey == null ? - null : String.valueOf(schedulerKey.getAllocationRequestId())); + null : schedulerKey.getAllocationRequestId()); } } @@ -150,49 +156,68 @@ public static void recordAppActivityWithAllocation( nodeId = updatedContainer.getNodeId(); } if (activitiesManager.shouldRecordThisNode(nodeId)) { - String containerPriorityStr = - updatedContainer.getContainer().getPriority().toString(); - String allocationRequestId = String - .valueOf(updatedContainer.getContainer().getAllocationRequestId()); + Integer containerPriority = + updatedContainer.getContainer().getPriority().getPriority(); + Long allocationRequestId = + updatedContainer.getContainer().getAllocationRequestId(); String requestName = - getRequestName(containerPriorityStr, allocationRequestId); - String type = "container"; - - // Add application-container activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(nodeId, - requestName, - updatedContainer.getContainer().toString(), - containerPriorityStr, - activityState, ActivityDiagnosticConstant.EMPTY, type, null); - type = "request"; - // Add application-container activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(nodeId, - application.getApplicationId().toString(), - requestName, containerPriorityStr, - activityState, ActivityDiagnosticConstant.EMPTY, type, - allocationRequestId); - type = "app"; - // Add queue-application activity into specific node allocation. - activitiesManager.addSchedulingActivityForNode(nodeId, - application.getQueueName(), - application.getApplicationId().toString(), - application.getPriority().toString(), ActivityState.ACCEPTED, - ActivityDiagnosticConstant.EMPTY, type, null); + getRequestName(containerPriority, allocationRequestId); + // Add node,request,app level activities into scheduler activities. + recordSchedulerActivityAtNodeLevel(activitiesManager, application, + requestName, containerPriority, allocationRequestId, + updatedContainer.getContainer().toString(), nodeId, activityState, + ActivityDiagnosticConstant.EMPTY); } // Add application-container activity into specific application allocation if (activitiesManager.shouldRecordThisApp( application.getApplicationId())) { - String type = "container"; activitiesManager.addSchedulingActivityForApp( application.getApplicationId(), updatedContainer.getContainerId(), - updatedContainer.getContainer().getPriority().toString(), - activityState, ActivityDiagnosticConstant.EMPTY, type, nodeId, - String.valueOf( - updatedContainer.getContainer().getAllocationRequestId())); + updatedContainer.getContainer().getPriority().getPriority(), + activityState, ActivityDiagnosticConstant.EMPTY, + ActivityLevel.NODE, nodeId, + updatedContainer.getContainer().getAllocationRequestId()); } } + @SuppressWarnings("parameternumber") + private static void recordSchedulerActivityAtNodeLevel( + ActivitiesManager activitiesManager, SchedulerApplicationAttempt app, + String requestName, Integer priority, Long allocationRequestId, + String containerId, NodeId nodeId, ActivityState state, + String diagnostic) { + activitiesManager + .addSchedulingActivityForNode(nodeId, requestName, containerId, null, + state, diagnostic, ActivityLevel.NODE, null); + // Record request level activity additionally. + recordSchedulerActivityAtRequestLevel(activitiesManager, app, requestName, + priority, allocationRequestId, nodeId, state, + ActivityDiagnosticConstant.EMPTY); + } + + @SuppressWarnings("parameternumber") + private static void recordSchedulerActivityAtRequestLevel( + ActivitiesManager activitiesManager, SchedulerApplicationAttempt app, + String requestName, Integer priority, Long allocationRequestId, + NodeId nodeId, ActivityState state, String diagnostic) { + activitiesManager.addSchedulingActivityForNode(nodeId, + app.getApplicationId().toString(), requestName, priority, + state, diagnostic, ActivityLevel.REQUEST, + allocationRequestId); + // Record app level activity additionally. + recordSchedulerActivityAtAppLevel(activitiesManager, app, nodeId, state, + ActivityDiagnosticConstant.EMPTY); + } + + private static void recordSchedulerActivityAtAppLevel( + ActivitiesManager activitiesManager, SchedulerApplicationAttempt app, + NodeId nodeId, ActivityState state, String diagnostic) { + activitiesManager.addSchedulingActivityForNode(nodeId, app.getQueueName(), + app.getApplicationId().toString(), app.getPriority().getPriority(), + state, diagnostic, ActivityLevel.APP, null); + } + /* * Invoked when scheduler starts to look at this application within one node * update. @@ -252,13 +277,20 @@ public static class QUEUE { public static void recordQueueActivity(ActivitiesManager activitiesManager, SchedulerNode node, String parentQueueName, String queueName, ActivityState state, String diagnostic) { + recordQueueActivity(activitiesManager, node, parentQueueName, queueName, + state, () -> diagnostic); + } + + public static void recordQueueActivity(ActivitiesManager activitiesManager, + SchedulerNode node, String parentQueueName, String queueName, + ActivityState state, Supplier diagnosticSupplier) { if (activitiesManager == null) { return; } NodeId nodeId = getRecordingNodeId(activitiesManager, node); if (activitiesManager.shouldRecordThisNode(nodeId)) { recordActivity(activitiesManager, nodeId, parentQueueName, queueName, - null, state, diagnostic, null); + null, state, diagnosticSupplier.get(), ActivityLevel.QUEUE); } } } @@ -299,11 +331,11 @@ public static void finishAllocatedNodeAllocation( * Invoked when node heartbeat finishes */ public static void finishNodeUpdateRecording( - ActivitiesManager activitiesManager, NodeId nodeID) { + ActivitiesManager activitiesManager, NodeId nodeID, String partition) { if (activitiesManager == null) { return; } - activitiesManager.finishNodeUpdateRecording(nodeID); + activitiesManager.finishNodeUpdateRecording(nodeID, partition); } /* @@ -320,11 +352,11 @@ public static void startNodeUpdateRecording( // Add queue, application or container activity into specific node allocation. private static void recordActivity(ActivitiesManager activitiesManager, - NodeId nodeId, String parentName, String childName, - Priority priority, ActivityState state, String diagnostic, String type) { + NodeId nodeId, String parentName, String childName, Priority priority, + ActivityState state, String diagnostic, ActivityLevel level) { activitiesManager.addSchedulingActivityForNode(nodeId, parentName, - childName, priority != null ? priority.toString() : null, state, - diagnostic, type, null); + childName, priority != null ? priority.getPriority() : null, state, + diagnostic, level, null); } private static NodeId getRecordingNodeId(ActivitiesManager activitiesManager, @@ -333,16 +365,16 @@ private static NodeId getRecordingNodeId(ActivitiesManager activitiesManager, activitiesManager.getRecordingNodeId(node); } - private static String getRequestName(String priority, - String allocationRequestId) { + private static String getRequestName(Integer priority, + Long allocationRequestId) { return "request_" + (priority == null ? "" : priority) + "_" + (allocationRequestId == null ? "" : allocationRequestId); } - private static String getPriorityStr(SchedulerRequestKey schedulerKey) { + private static Integer getPriority(SchedulerRequestKey schedulerKey) { Priority priority = schedulerKey == null ? null : schedulerKey.getPriority(); - return priority == null ? null : priority.toString(); + return priority == null ? null : priority.getPriority(); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java index 05ac01b378..77ed2182f1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java @@ -124,7 +124,7 @@ private void setupConfForCleanup(Configuration conf) { } public AppActivitiesInfo getAppActivitiesInfo(ApplicationId applicationId, - Set requestPriorities, Set allocationRequestIds, + Set requestPriorities, Set allocationRequestIds, RMWSConsts.ActivitiesGroupBy groupBy, int limit, boolean summarize, double maxTimeInSeconds) { RMApp app = rmContext.getRMApps().get(applicationId); @@ -186,20 +186,18 @@ private AppAllocation getSummarizedAppAllocation( } List activityNodes = appAllocation.getAllocationAttempts(); for (ActivityNode an : activityNodes) { - if (an.getNodeId() != null) { - nodeActivities.putIfAbsent( - an.getRequestPriority() + "_" + an.getAllocationRequestId() + "_" - + an.getNodeId(), an); - } + nodeActivities.putIfAbsent( + an.getRequestPriority() + "_" + an.getAllocationRequestId() + "_" + + an.getNodeId(), an); } } AppAllocation lastAppAllocation = allocations.get(allocations.size() - 1); AppAllocation summarizedAppAllocation = new AppAllocation(lastAppAllocation.getPriority(), null, lastAppAllocation.getQueueName()); - summarizedAppAllocation - .updateAppContainerStateAndTime(null, lastAppAllocation.getAppState(), - lastAppAllocation.getTime(), lastAppAllocation.getDiagnostic()); + summarizedAppAllocation.updateAppContainerStateAndTime(null, + lastAppAllocation.getActivityState(), lastAppAllocation.getTime(), + lastAppAllocation.getDiagnostic()); summarizedAppAllocation .setAllocationAttempts(new ArrayList<>(nodeActivities.values())); return summarizedAppAllocation; @@ -282,7 +280,7 @@ public void run() { Map.Entry> nodeAllocation = ite.next(); List allocations = nodeAllocation.getValue(); if (allocations.size() > 0 - && curTS - allocations.get(0).getTimeStamp() + && curTS - allocations.get(0).getTimestamp() > schedulerActivitiesTTL) { ite.remove(); } @@ -383,26 +381,26 @@ void startAppAllocationRecording(NodeId nodeID, long currTS, // Add queue, application or container activity into specific node allocation. void addSchedulingActivityForNode(NodeId nodeId, String parentName, - String childName, String priority, ActivityState state, String diagnostic, - String type, String allocationRequestId) { + String childName, Integer priority, ActivityState state, + String diagnostic, ActivityLevel level, Long allocationRequestId) { if (shouldRecordThisNode(nodeId)) { NodeAllocation nodeAllocation = getCurrentNodeAllocation(nodeId); nodeAllocation.addAllocationActivity(parentName, childName, priority, - state, diagnostic, type, nodeId, allocationRequestId); + state, diagnostic, level, nodeId, allocationRequestId); } } // Add queue, application or container activity into specific application // allocation. void addSchedulingActivityForApp(ApplicationId applicationId, - ContainerId containerId, String priority, ActivityState state, - String diagnostic, String type, NodeId nodeId, - String allocationRequestId) { + ContainerId containerId, Integer priority, ActivityState state, + String diagnostic, ActivityLevel level, NodeId nodeId, + Long allocationRequestId) { if (shouldRecordThisApp(applicationId)) { AppAllocation appAllocation = appsAllocation.get().get(applicationId); appAllocation.addAppAllocationActivity(containerId == null ? "Container-Id-Not-Assigned" : - containerId.toString(), priority, state, diagnostic, type, nodeId, + containerId.toString(), priority, state, diagnostic, level, nodeId, allocationRequestId); } } @@ -450,16 +448,17 @@ void finishAppAllocationRecording(ApplicationId applicationId, } } - void finishNodeUpdateRecording(NodeId nodeID) { + void finishNodeUpdateRecording(NodeId nodeID, String partition) { List value = recordingNodesAllocation.get().get(nodeID); - long timeStamp = SystemClock.getInstance().getTime(); + long timestamp = SystemClock.getInstance().getTime(); if (value != null) { if (value.size() > 0) { lastAvailableNodeActivities = value; for (NodeAllocation allocation : lastAvailableNodeActivities) { allocation.transformToTree(); - allocation.setTimeStamp(timeStamp); + allocation.setTimestamp(timestamp); + allocation.setPartition(partition); } if (recordNextAvailableNode) { recordNextAvailableNode = false; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesUtils.java index 4cdaac8e91..e7e4ad480f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesUtils.java @@ -40,11 +40,11 @@ public static List getRequestActivityNodeInfos( } if (groupBy == RMWSConsts.ActivitiesGroupBy.DIAGNOSTIC) { Map>> groupingResults = - activityNodes.stream().collect(Collectors - .groupingBy(ActivityNode::getState, Collectors + activityNodes.stream() + .filter(e -> e.getNodeId() != null) + .collect(Collectors.groupingBy(ActivityNode::getState, Collectors .groupingBy(ActivityNode::getShortDiagnostic, - Collectors.mapping(e -> e.getNodeId() == null ? - "" : + Collectors.mapping(e -> e.getNodeId() == null ? "" : e.getNodeId().toString(), Collectors.toList())))); return groupingResults.entrySet().stream().flatMap( stateMap -> stateMap.getValue().entrySet().stream().map( @@ -53,8 +53,8 @@ public static List getRequestActivityNodeInfos( diagMap.getValue()))) .collect(Collectors.toList()); } else { - return activityNodes.stream().map( - e -> new ActivityNodeInfo(e.getName(), e.getState(), + return activityNodes.stream().filter(e -> e.getNodeId() != null) + .map(e -> new ActivityNodeInfo(e.getName(), e.getState(), e.getDiagnostic(), e.getNodeId())).collect(Collectors.toList()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityDiagnosticConstant.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityDiagnosticConstant.java index d3d4d9b4d3..ecaa88438a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityDiagnosticConstant.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityDiagnosticConstant.java @@ -26,51 +26,81 @@ public class ActivityDiagnosticConstant { // In order not to show "diagnostic" line in frontend, // we set the value to null. public final static String EMPTY = null; - public final static String NOT_ABLE_TO_ACCESS_PARTITION = - "Not able to access partition"; + + /* + * Initial check diagnostics + */ + public final static String INIT_CHECK_SINGLE_NODE_REMOVED = + "Initial check: node has been removed from scheduler"; + public final static String INIT_CHECK_SINGLE_NODE_RESOURCE_INSUFFICIENT = + "Initial check: node resource is insufficient for minimum allocation"; + public final static String INIT_CHECK_PARTITION_RESOURCE_INSUFFICIENT = + "Initial check: insufficient resource in partition"; + + /* + * Queue level diagnostics + */ + public final static String QUEUE_NOT_ABLE_TO_ACCESS_PARTITION = + "Queue is not able to access partition"; + public final static String QUEUE_HIT_MAX_CAPACITY_LIMIT = + "Queue hits max-capacity limit"; + public final static String QUEUE_HIT_USER_MAX_CAPACITY_LIMIT = + "Queue hits user max-capacity limit"; + public final static String QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM = + "Queue does not have enough headroom for inner highest-priority request"; + public final static String QUEUE_DO_NOT_NEED_MORE_RESOURCE = "Queue does not need more resource"; - public final static String QUEUE_MAX_CAPACITY_LIMIT = - "Hit queue max-capacity limit"; - public final static String USER_CAPACITY_MAXIMUM_LIMIT = - "Hit user capacity maximum limit"; - public final static String SKIP_BLACK_LISTED_NODE = "Skip black listed node"; - public final static String PRIORITY_SKIPPED = "Priority skipped"; - public final static String PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST = - "Priority skipped because off-switch request is null"; - public final static String SKIP_PRIORITY_BECAUSE_OF_RELAX_LOCALITY = - "Priority skipped because of relax locality is not allowed"; - public final static String SKIP_IN_IGNORE_EXCLUSIVITY_MODE = - "Skipping assigning to Node in Ignore Exclusivity mode"; - public final static String DO_NOT_NEED_ALLOCATIONATTEMPTINFOS = - "Doesn't need containers based on reservation algo!"; - public final static String QUEUE_SKIPPED_HEADROOM = - "Queue skipped because of headroom"; - public final static String NON_PARTITIONED_PARTITION_FIRST = - "Non-partitioned resource request should be scheduled to " - + "non-partitioned partition first"; - public final static String SKIP_NODE_LOCAL_REQUEST = - "Skip node-local request"; - public final static String SKIP_RACK_LOCAL_REQUEST = - "Skip rack-local request"; - public final static String SKIP_OFF_SWITCH_REQUEST = - "Skip offswitch request"; - public final static String REQUEST_CAN_NOT_ACCESS_NODE_LABEL = - "Resource request can not access the label"; - public final static String NOT_SUFFICIENT_RESOURCE = - "Node does not have sufficient resource for request"; - public final static String LOCALITY_SKIPPED = "Locality skipped"; - public final static String FAIL_TO_ALLOCATE = "Fail to allocate"; - public final static String COULD_NOT_GET_CONTAINER = - "Couldn't get container for allocation"; + public final static String QUEUE_SKIPPED_TO_RESPECT_FIFO = "Queue skipped " + + "to respect FIFO of applications"; + public final static String QUEUE_SKIPPED_BECAUSE_SINGLE_NODE_RESERVED = + "Queue skipped because node has been reserved"; + public final static String + QUEUE_SKIPPED_BECAUSE_SINGLE_NODE_RESOURCE_INSUFFICIENT = + "Queue skipped because node resource is insufficient"; + + /* + * Application level diagnostics + */ + public final static String APPLICATION_FAIL_TO_ALLOCATE = + "Application fails to allocate"; + public final static String APPLICATION_COULD_NOT_GET_CONTAINER = + "Application couldn't get container for allocation"; + public final static String APPLICATION_DO_NOT_NEED_RESOURCE = "Application does not need more resource"; - public final static String APPLICATION_PRIORITY_DO_NOT_NEED_RESOURCE = - "Application priority does not need more resource"; - public final static String SKIPPED_ALL_PRIORITIES = - "All priorities are skipped of the app"; - public final static String RESPECT_FIFO = "To respect FIFO of applications, " - + "skipped following applications in the queue"; + + /* + * Request level diagnostics + */ + public final static String REQUEST_SKIPPED_BECAUSE_NULL_ANY_REQUEST = + "Request skipped because off-switch request is null"; + public final static String REQUEST_SKIPPED_IN_IGNORE_EXCLUSIVITY_MODE = + "Request skipped in Ignore Exclusivity mode for AM allocation"; + public final static String REQUEST_SKIPPED_BECAUSE_OF_RESERVATION = + "Request skipped based on reservation algo"; + public final static String + REQUEST_SKIPPED_BECAUSE_NON_PARTITIONED_PARTITION_FIRST = + "Request skipped because non-partitioned resource request should be " + + "scheduled to non-partitioned partition first"; + public final static String REQUEST_DO_NOT_NEED_RESOURCE = + "Request does not need more resource"; + + /* + * Node level diagnostics + */ + public final static String + NODE_SKIPPED_BECAUSE_OF_NO_OFF_SWITCH_AND_LOCALITY_VIOLATION = + "Node skipped because node/rack locality cannot be satisfied"; + public final static String NODE_SKIPPED_BECAUSE_OF_OFF_SWITCH_DELAY = + "Node skipped because of off-switch delay"; + public final static String NODE_SKIPPED_BECAUSE_OF_RELAX_LOCALITY = + "Node skipped because relax locality is not allowed"; + public final static String NODE_TOTAL_RESOURCE_INSUFFICIENT_FOR_REQUEST = + "Node's total resource is insufficient for request"; + public final static String NODE_DO_NOT_HAVE_SUFFICIENT_RESOURCE = + "Node does not have sufficient resource for request"; + public final static String NODE_IS_BLACKLISTED = "Node is blacklisted"; public final static String NODE_DO_NOT_MATCH_PARTITION_OR_PLACEMENT_CONSTRAINTS = "Node does not match partition or placement constraints"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityLevel.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityLevel.java new file mode 100644 index 0000000000..ab08d5f078 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityLevel.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities; + +/** + * Collection of activity operation levels. + */ +public enum ActivityLevel { + QUEUE, + APP, + REQUEST, + NODE +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityNode.java index e658d2fbef..2725ebb325 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivityNode.java @@ -32,36 +32,36 @@ public class ActivityNode { private String activityNodeName; private String parentName; - private String appPriority; - private String requestPriority; + private Integer appPriority; + private Integer requestPriority; private ActivityState state; private String diagnostic; private NodeId nodeId; - private String allocationRequestId; + private Long allocationRequestId; private List childNode; public ActivityNode(String activityNodeName, String parentName, - String priority, ActivityState state, String diagnostic, String type) { - this(activityNodeName, parentName, priority, state, diagnostic, type, null, - null); - } - - public ActivityNode(String activityNodeName, String parentName, - String priority, ActivityState state, String diagnostic, String type, - NodeId nodeId, String allocationRequestId) { + Integer priority, ActivityState state, String diagnostic, + ActivityLevel level, NodeId nodeId, Long allocationRequestId) { this.activityNodeName = activityNodeName; this.parentName = parentName; - if (type != null) { - if (type.equals("app")) { + if (level != null) { + switch (level) { + case APP: this.appPriority = priority; - } else if (type.equals("request")) { + break; + case REQUEST: this.requestPriority = priority; this.allocationRequestId = allocationRequestId; - } else if (type.equals("container")) { + break; + case NODE: this.requestPriority = priority; this.allocationRequestId = allocationRequestId; this.nodeId = nodeId; + break; + default: + break; } } this.state = state; @@ -93,11 +93,11 @@ public String getDiagnostic() { return this.diagnostic; } - public String getAppPriority() { + public Integer getAppPriority() { return appPriority; } - public String getRequestPriority() { + public Integer getRequestPriority() { return requestPriority; } @@ -105,7 +105,7 @@ public NodeId getNodeId() { return nodeId; } - public String getAllocationRequestId() { + public Long getAllocationRequestId() { return allocationRequestId; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AllocationActivity.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AllocationActivity.java index a71ec6f561..b4231d7082 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AllocationActivity.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AllocationActivity.java @@ -30,29 +30,37 @@ public class AllocationActivity { private String childName = null; private String parentName = null; - private String appPriority = null; - private String requestPriority = null; + private Integer appPriority = null; + private Integer requestPriority = null; private ActivityState state; private String diagnostic = null; private NodeId nodeId; - private String allocationRequestId; + private Long allocationRequestId; + private ActivityLevel level; private static final Logger LOG = LoggerFactory.getLogger(AllocationActivity.class); public AllocationActivity(String parentName, String queueName, - String priority, ActivityState state, String diagnostic, String type, - NodeId nodeId, String allocationRequestId) { + Integer priority, ActivityState state, String diagnostic, + ActivityLevel level, NodeId nodeId, Long allocationRequestId) { this.childName = queueName; this.parentName = parentName; - if (type != null) { - if (type.equals("app")) { + if (level != null) { + this.level = level; + switch (level) { + case APP: this.appPriority = priority; - } else if (type.equals("request")) { + break; + case REQUEST: this.requestPriority = priority; this.allocationRequestId = allocationRequestId; - } else if (type.equals("container")) { + break; + case NODE: this.nodeId = nodeId; + break; + default: + break; } } this.state = state; @@ -60,21 +68,11 @@ public AllocationActivity(String parentName, String queueName, } public ActivityNode createTreeNode() { - if (appPriority != null) { - return new ActivityNode(this.childName, this.parentName, this.appPriority, - this.state, this.diagnostic, "app"); - } else if (requestPriority != null) { - return new ActivityNode(this.childName, this.parentName, - this.requestPriority, this.state, this.diagnostic, "request", null, - allocationRequestId); - } else if (nodeId != null) { - return new ActivityNode(this.childName, this.parentName, - this.requestPriority, this.state, this.diagnostic, "container", - this.nodeId, null); - } else { - return new ActivityNode(this.childName, this.parentName, null, this.state, - this.diagnostic, null); - } + return new ActivityNode(this.childName, this.parentName, + this.level == ActivityLevel.APP ? + this.appPriority : this.requestPriority, + this.state, this.diagnostic, this.level, + this.nodeId, this.allocationRequestId); } public String getName() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AppAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AppAllocation.java index e226b50fb7..49d0bc0e00 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AppAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/AppAllocation.java @@ -29,18 +29,18 @@ import java.util.function.Predicate; import java.util.stream.Collectors; -/* +/** * It contains allocation information for one application within a period of * time. * Each application allocation may have several allocation attempts. */ public class AppAllocation { - private Priority priority = null; + private Priority priority; private NodeId nodeId; - private ContainerId containerId = null; - private ActivityState appState = null; - private String diagnostic = null; - private String queueName = null; + private ContainerId containerId; + private ActivityState activityState; + private String diagnostic; + private String queueName; private List allocationAttempts; private long timestamp; @@ -51,24 +51,24 @@ public AppAllocation(Priority priority, NodeId nodeId, String queueName) { this.queueName = queueName; } - public void updateAppContainerStateAndTime(ContainerId containerId, + public void updateAppContainerStateAndTime(ContainerId cId, ActivityState appState, long ts, String diagnostic) { this.timestamp = ts; - this.containerId = containerId; - this.appState = appState; + this.containerId = cId; + this.activityState = appState; this.diagnostic = diagnostic; } - public void addAppAllocationActivity(String containerId, String priority, - ActivityState state, String diagnose, String type, NodeId nId, - String allocationRequestId) { - ActivityNode container = new ActivityNode(containerId, null, priority, - state, diagnose, type, nId, allocationRequestId); + public void addAppAllocationActivity(String cId, Integer reqPriority, + ActivityState state, String diagnose, ActivityLevel level, NodeId nId, + Long allocationRequestId) { + ActivityNode container = new ActivityNode(cId, null, reqPriority, + state, diagnose, level, nId, allocationRequestId); this.allocationAttempts.add(container); if (state == ActivityState.REJECTED) { - this.appState = ActivityState.SKIPPED; + this.activityState = ActivityState.SKIPPED; } else { - this.appState = state; + this.activityState = state; } } @@ -80,8 +80,8 @@ public String getQueueName() { return queueName; } - public ActivityState getAppState() { - return appState; + public ActivityState getActivityState() { + return activityState; } public Priority getPriority() { @@ -107,11 +107,11 @@ public List getAllocationAttempts() { return allocationAttempts; } - public AppAllocation filterAllocationAttempts(Set requestPriorities, - Set allocationRequestIds) { + public AppAllocation filterAllocationAttempts(Set requestPriorities, + Set allocationRequestIds) { AppAllocation appAllocation = new AppAllocation(this.priority, this.nodeId, this.queueName); - appAllocation.appState = this.appState; + appAllocation.activityState = this.activityState; appAllocation.containerId = this.containerId; appAllocation.timestamp = this.timestamp; appAllocation.diagnostic = this.diagnostic; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/NodeAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/NodeAllocation.java index df1d00c77e..99e006219d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/NodeAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/NodeAllocation.java @@ -39,10 +39,11 @@ */ public class NodeAllocation { private NodeId nodeId; - private long timeStamp; + private long timestamp; private ContainerId containerId = null; private AllocationState containerState = AllocationState.DEFAULT; private List allocationOperations; + private String partition; private ActivityNode root = null; @@ -55,10 +56,10 @@ public NodeAllocation(NodeId nodeId) { } public void addAllocationActivity(String parentName, String childName, - String priority, ActivityState state, String diagnostic, String type, - NodeId nId, String allocationRequestId) { + Integer priority, ActivityState state, String diagnostic, + ActivityLevel level, NodeId nId, Long allocationRequestId) { AllocationActivity allocate = new AllocationActivity(parentName, childName, - priority, state, diagnostic, type, nId, allocationRequestId); + priority, state, diagnostic, level, nId, allocationRequestId); this.allocationOperations.add(allocate); } @@ -113,12 +114,12 @@ public void transformToTree() { } } - public void setTimeStamp(long timeStamp) { - this.timeStamp = timeStamp; + public void setTimestamp(long timestamp) { + this.timestamp = timestamp; } - public long getTimeStamp() { - return this.timeStamp; + public long getTimestamp() { + return this.timestamp; } public AllocationState getFinalAllocationState() { @@ -138,4 +139,12 @@ public ActivityNode getRoot() { public NodeId getNodeId() { return nodeId; } + + public String getPartition() { + return partition; + } + + public void setPartition(String partition) { + this.partition = partition; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 21cdbf9113..e0695e9c3d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -1518,6 +1518,11 @@ private CSAssignment allocateContainerOnSingleNode( if (getNode(node.getNodeID()) != node) { LOG.error("Trying to schedule on a removed node, please double check, " + "nodeId=" + node.getNodeID()); + ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, + "", getRootQueue().getQueueName(), ActivityState.REJECTED, + ActivityDiagnosticConstant.INIT_CHECK_SINGLE_NODE_REMOVED); + ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, + node); return null; } @@ -1527,12 +1532,9 @@ private CSAssignment allocateContainerOnSingleNode( RMContainer reservedContainer = node.getReservedContainer(); if (reservedContainer != null) { allocateFromReservedContainer(node, withNodeHeartbeat, reservedContainer); - } - - // Do not schedule if there are any reservations to fulfill on the node - if (node.getReservedContainer() != null) { + // Do not schedule if there are any reservations to fulfill on the node LOG.debug("Skipping scheduling since node {} is reserved by" - + " application {}", node.getNodeID(), node.getReservedContainer(). + + " application {}", node.getNodeID(), reservedContainer. getContainerId().getApplicationAttemptId()); return null; } @@ -1543,8 +1545,14 @@ private CSAssignment allocateContainerOnSingleNode( if (calculator.computeAvailableContainers(Resources .add(node.getUnallocatedResource(), node.getTotalKillableResources()), minimumAllocation) <= 0) { - LOG.debug("This node or node partition doesn't have available or" + - " preemptible resource"); + LOG.debug("This node " + node.getNodeID() + " doesn't have sufficient " + + "available or preemptible resource for minimum allocation"); + ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, + "", getRootQueue().getQueueName(), ActivityState.REJECTED, + ActivityDiagnosticConstant. + INIT_CHECK_SINGLE_NODE_RESOURCE_INSUFFICIENT); + ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, + node); return null; } @@ -1594,12 +1602,12 @@ private void allocateFromReservedContainer(FiCaSchedulerNode node, ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager, node, reservedContainer.getContainerId(), AllocationState.ALLOCATED_FROM_RESERVED); - } else{ + } else if (assignment.getAssignmentInformation().getNumReservations() > 0) { ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, queue.getParent().getQueueName(), queue.getQueueName(), - ActivityState.SKIPPED, ActivityDiagnosticConstant.EMPTY); + ActivityState.RE_RESERVED, ActivityDiagnosticConstant.EMPTY); ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager, - node, reservedContainer.getContainerId(), AllocationState.SKIPPED); + node, reservedContainer.getContainerId(), AllocationState.RESERVED); } assignment.setSchedulingMode( @@ -1685,12 +1693,14 @@ private CSAssignment allocateContainersOnMultiNodes( allocateFromReservedContainer(node, false, reservedContainer); } } - LOG.debug("This node or this node partition doesn't have available or " - + "killable resource"); + LOG.debug("This partition '{}' doesn't have available or " + + "killable resource", candidates.getPartition()); ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, null, "", getRootQueue().getQueueName(), ActivityState.REJECTED, - ActivityDiagnosticConstant.NOT_ABLE_TO_ACCESS_PARTITION + " " - + candidates.getPartition()); + ActivityDiagnosticConstant. + INIT_CHECK_PARTITION_RESOURCE_INSUFFICIENT); + ActivitiesLogger.NODE + .finishSkippedNodeAllocation(activitiesManager, null); return null; } @@ -1721,13 +1731,13 @@ CSAssignment allocateContainersToNode( assignment = allocateContainerOnSingleNode(candidates, node, withNodeHeartbeat); ActivitiesLogger.NODE.finishNodeUpdateRecording(activitiesManager, - node.getNodeID()); + node.getNodeID(), candidates.getPartition()); } else{ ActivitiesLogger.NODE.startNodeUpdateRecording(activitiesManager, ActivitiesManager.EMPTY_NODE_ID); assignment = allocateContainersOnMultiNodes(candidates); ActivitiesLogger.NODE.finishNodeUpdateRecording(activitiesManager, - ActivitiesManager.EMPTY_NODE_ID); + ActivitiesManager.EMPTY_NODE_ID, candidates.getPartition()); } if (assignment != null && assignment.getAssignmentInformation() != null diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 4d2b8e7c30..934fc1b728 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -1074,8 +1074,7 @@ public CSAssignment assignContainers(Resource clusterResource, && !accessibleToPartition(candidates.getPartition())) { ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParent().getQueueName(), getQueueName(), ActivityState.REJECTED, - ActivityDiagnosticConstant.NOT_ABLE_TO_ACCESS_PARTITION + " " - + candidates.getPartition()); + ActivityDiagnosticConstant.QUEUE_NOT_ABLE_TO_ACCESS_PARTITION); return CSAssignment.NULL_ASSIGNMENT; } @@ -1113,10 +1112,11 @@ public CSAssignment assignContainers(Resource clusterResource, schedulingMode)) { ActivitiesLogger.APP.recordRejectedAppActivityFromLeafQueue( activitiesManager, node, application, application.getPriority(), - ActivityDiagnosticConstant.QUEUE_MAX_CAPACITY_LIMIT); + ActivityDiagnosticConstant.QUEUE_HIT_MAX_CAPACITY_LIMIT); ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, - getParent().getQueueName(), getQueueName(), ActivityState.SKIPPED, - ActivityDiagnosticConstant.EMPTY); + getParent().getQueueName(), getQueueName(), + ActivityState.REJECTED, + ActivityDiagnosticConstant.QUEUE_HIT_MAX_CAPACITY_LIMIT); return CSAssignment.NULL_ASSIGNMENT; } // If there was no reservation and canAssignToThisQueue returned @@ -1157,7 +1157,7 @@ public CSAssignment assignContainers(Resource clusterResource, "User capacity has reached its maximum limit."); ActivitiesLogger.APP.recordRejectedAppActivityFromLeafQueue( activitiesManager, node, application, application.getPriority(), - ActivityDiagnosticConstant.USER_CAPACITY_MAXIMUM_LIMIT); + ActivityDiagnosticConstant.QUEUE_HIT_USER_MAX_CAPACITY_LIMIT); continue; } @@ -1189,15 +1189,16 @@ public CSAssignment assignContainers(Resource clusterResource, } else if (assignment.getSkippedType() == CSAssignment.SkippedType.QUEUE_LIMIT) { ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, - getParent().getQueueName(), getQueueName(), ActivityState.SKIPPED, - ActivityDiagnosticConstant.QUEUE_SKIPPED_HEADROOM); + getParent().getQueueName(), getQueueName(), ActivityState.REJECTED, + () -> ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM + + " from " + application.getApplicationId()); return assignment; } else{ // If we don't allocate anything, and it is not skipped by application, // we will return to respect FIFO of applications ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParent().getQueueName(), getQueueName(), ActivityState.SKIPPED, - ActivityDiagnosticConstant.RESPECT_FIFO); + ActivityDiagnosticConstant.QUEUE_SKIPPED_TO_RESPECT_FIFO); ActivitiesLogger.APP.finishSkippedAppAllocationRecording( activitiesManager, application.getApplicationId(), ActivityState.SKIPPED, ActivityDiagnosticConstant.EMPTY); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java index fb747fb4d9..8d3244722c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java @@ -559,8 +559,7 @@ public CSAssignment assignContainers(Resource clusterResource, ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, getParentName(), getQueueName(), ActivityState.REJECTED, - ActivityDiagnosticConstant.NOT_ABLE_TO_ACCESS_PARTITION - + candidates.getPartition()); + ActivityDiagnosticConstant.QUEUE_NOT_ABLE_TO_ACCESS_PARTITION); if (rootQueue) { ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, node); @@ -613,8 +612,8 @@ public CSAssignment assignContainers(Resource clusterResource, getMetrics().getReservedVirtualCores()), schedulingMode)) { ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, - getParentName(), getQueueName(), ActivityState.SKIPPED, - ActivityDiagnosticConstant.QUEUE_MAX_CAPACITY_LIMIT); + getParentName(), getQueueName(), ActivityState.REJECTED, + ActivityDiagnosticConstant.QUEUE_HIT_MAX_CAPACITY_LIMIT); if (rootQueue) { ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, node); @@ -648,22 +647,13 @@ public CSAssignment assignContainers(Resource clusterResource, assignedToChild.getAssignmentInformation().getReservationDetails() != null && !assignedToChild.getAssignmentInformation() .getReservationDetails().isEmpty(); - if (node != null && !isReserved) { - if (rootQueue) { - ActivitiesLogger.NODE.finishAllocatedNodeAllocation( - activitiesManager, node, - assignedToChild.getAssignmentInformation() - .getFirstAllocatedOrReservedContainerId(), - AllocationState.ALLOCATED); - } - } else{ - if (rootQueue) { - ActivitiesLogger.NODE.finishAllocatedNodeAllocation( - activitiesManager, node, - assignedToChild.getAssignmentInformation() - .getFirstAllocatedOrReservedContainerId(), - AllocationState.RESERVED); - } + if (rootQueue) { + ActivitiesLogger.NODE.finishAllocatedNodeAllocation( + activitiesManager, node, + assignedToChild.getAssignmentInformation() + .getFirstAllocatedOrReservedContainerId(), + isReserved ? + AllocationState.RESERVED : AllocationState.ALLOCATED); } // Track resource utilization in this pass of the scheduler @@ -735,10 +725,24 @@ private boolean canAssign(Resource clusterResource, FiCaSchedulerNode node) { // Two conditions need to meet when trying to allocate: // 1) Node doesn't have reserved container // 2) Node's available-resource + killable-resource should > 0 - return node.getReservedContainer() == null && Resources.greaterThanOrEqual( - resourceCalculator, clusterResource, Resources + boolean accept = node.getReservedContainer() == null && Resources + .greaterThanOrEqual(resourceCalculator, clusterResource, Resources .add(node.getUnallocatedResource(), node.getTotalKillableResources()), minimumAllocation); + if (!accept) { + ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, + getParentName(), getQueueName(), ActivityState.REJECTED, + () -> node.getReservedContainer() != null ? + ActivityDiagnosticConstant. + QUEUE_SKIPPED_BECAUSE_SINGLE_NODE_RESERVED : + ActivityDiagnosticConstant. + QUEUE_SKIPPED_BECAUSE_SINGLE_NODE_RESOURCE_INSUFFICIENT); + if (rootQueue) { + ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager, + node); + } + } + return accept; } private ResourceLimits getResourceLimitsOfChild(CSQueue child, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java index 2643fd0b7a..1dacc96242 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java @@ -24,6 +24,7 @@ import java.util.Optional; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityLevel; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.DiagnosticsCollector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -107,7 +108,8 @@ private ContainerAllocation preCheckForNodeCandidateSet( if (offswitchPendingAsk.getCount() <= 0) { ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST); + ActivityDiagnosticConstant.REQUEST_DO_NOT_NEED_RESOURCE, + ActivityLevel.REQUEST); return ContainerAllocation.PRIORITY_SKIPPED; } @@ -118,7 +120,8 @@ private ContainerAllocation preCheckForNodeCandidateSet( if (application.getOutstandingAsksCount(schedulerKey) <= 0) { ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.APPLICATION_PRIORITY_DO_NOT_NEED_RESOURCE); + ActivityDiagnosticConstant.REQUEST_DO_NOT_NEED_RESOURCE, + ActivityLevel.REQUEST); return ContainerAllocation.PRIORITY_SKIPPED; } @@ -133,7 +136,9 @@ private ContainerAllocation preCheckForNodeCandidateSet( "Skipping assigning to Node in Ignore Exclusivity mode. "); ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.SKIP_IN_IGNORE_EXCLUSIVITY_MODE); + ActivityDiagnosticConstant. + REQUEST_SKIPPED_IN_IGNORE_EXCLUSIVITY_MODE, + ActivityLevel.REQUEST); return ContainerAllocation.APP_SKIPPED; } } @@ -148,7 +153,8 @@ private ContainerAllocation preCheckForNodeCandidateSet( activitiesManager, node, application, schedulerKey, ActivityDiagnosticConstant. NODE_DO_NOT_MATCH_PARTITION_OR_PLACEMENT_CONSTRAINTS - + ActivitiesManager.getDiagnostics(dcOpt)); + + ActivitiesManager.getDiagnostics(dcOpt), + ActivityLevel.NODE); return ContainerAllocation.PRIORITY_SKIPPED; } @@ -157,7 +163,8 @@ private ContainerAllocation preCheckForNodeCandidateSet( LOG.debug("doesn't need containers based on reservation algo!"); ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.DO_NOT_NEED_ALLOCATIONATTEMPTINFOS); + ActivityDiagnosticConstant.REQUEST_SKIPPED_BECAUSE_OF_RESERVATION, + ActivityLevel.REQUEST); return ContainerAllocation.PRIORITY_SKIPPED; } } @@ -166,9 +173,11 @@ private ContainerAllocation preCheckForNodeCandidateSet( node.getPartition())) { LOG.debug("cannot allocate required resource={} because of headroom", required); - ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( + ActivitiesLogger.APP.recordAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.QUEUE_SKIPPED_HEADROOM); + ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM, + ActivityState.REJECTED, + ActivityLevel.REQUEST); return ContainerAllocation.QUEUE_SKIPPED; } @@ -183,7 +192,8 @@ private ContainerAllocation preCheckForNodeCandidateSet( // thread. ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST); + ActivityDiagnosticConstant.REQUEST_SKIPPED_BECAUSE_NULL_ANY_REQUEST, + ActivityLevel.REQUEST); return ContainerAllocation.PRIORITY_SKIPPED; } String requestPartition = @@ -213,7 +223,9 @@ private ContainerAllocation preCheckForNodeCandidateSet( } ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.NON_PARTITIONED_PARTITION_FIRST); + ActivityDiagnosticConstant. + REQUEST_SKIPPED_BECAUSE_NON_PARTITIONED_PARTITION_FIRST, + ActivityLevel.REQUEST); return ContainerAllocation.APP_SKIPPED; } } @@ -228,7 +240,8 @@ private ContainerAllocation checkIfNodeBlackListed(FiCaSchedulerNode node, CSAMContainerLaunchDiagnosticsConstants.SKIP_AM_ALLOCATION_IN_BLACK_LISTED_NODE); ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.SKIP_BLACK_LISTED_NODE); + ActivityDiagnosticConstant.NODE_IS_BLACKLISTED, + ActivityLevel.NODE); return ContainerAllocation.APP_SKIPPED; } @@ -366,9 +379,6 @@ private ContainerAllocation assignNodeLocalContainers( } // Skip node-local request, go to rack-local request - ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( - activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.SKIP_NODE_LOCAL_REQUEST); return ContainerAllocation.LOCALITY_SKIPPED; } @@ -384,9 +394,6 @@ private ContainerAllocation assignRackLocalContainers( } // Skip rack-local request, go to off-switch request - ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( - activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.SKIP_RACK_LOCAL_REQUEST); return ContainerAllocation.LOCALITY_SKIPPED; } @@ -405,7 +412,8 @@ private ContainerAllocation assignOffSwitchContainers( CSAMContainerLaunchDiagnosticsConstants.SKIP_AM_ALLOCATION_DUE_TO_LOCALITY); ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.SKIP_OFF_SWITCH_REQUEST); + ActivityDiagnosticConstant.NODE_SKIPPED_BECAUSE_OF_OFF_SWITCH_DELAY, + ActivityLevel.NODE); return ContainerAllocation.APP_SKIPPED; } @@ -439,7 +447,8 @@ private ContainerAllocation assignContainersOnNode(Resource clusterResource, if (!appInfo.canDelayTo(schedulerKey, node.getRackName())) { ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.SKIP_PRIORITY_BECAUSE_OF_RELAX_LOCALITY); + ActivityDiagnosticConstant.NODE_SKIPPED_BECAUSE_OF_RELAX_LOCALITY, + ActivityLevel.NODE); return ContainerAllocation.PRIORITY_SKIPPED; } @@ -465,7 +474,8 @@ private ContainerAllocation assignContainersOnNode(Resource clusterResource, if (!appInfo.canDelayTo(schedulerKey, ResourceRequest.ANY)) { ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.SKIP_PRIORITY_BECAUSE_OF_RELAX_LOCALITY); + ActivityDiagnosticConstant.NODE_SKIPPED_BECAUSE_OF_RELAX_LOCALITY, + ActivityLevel.NODE); return ContainerAllocation.PRIORITY_SKIPPED; } @@ -489,7 +499,9 @@ private ContainerAllocation assignContainersOnNode(Resource clusterResource, } ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.PRIORITY_SKIPPED); + ActivityDiagnosticConstant. + NODE_SKIPPED_BECAUSE_OF_NO_OFF_SWITCH_AND_LOCALITY_VIOLATION, + ActivityLevel.NODE); return ContainerAllocation.PRIORITY_SKIPPED; } @@ -516,8 +528,10 @@ private ContainerAllocation assignContainer(Resource clusterResource, // Skip this locality request ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE - + getResourceDiagnostics(capability, totalResource)); + ActivityDiagnosticConstant. + NODE_TOTAL_RESOURCE_INSUFFICIENT_FOR_REQUEST + + getResourceDiagnostics(capability, totalResource), + ActivityLevel.NODE); return ContainerAllocation.LOCALITY_SKIPPED; } @@ -597,7 +611,8 @@ private ContainerAllocation assignContainer(Resource clusterResource, ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, ActivityDiagnosticConstant. - NODE_CAN_NOT_FIND_CONTAINER_TO_BE_UNRESERVED_WHEN_NEEDED); + NODE_CAN_NOT_FIND_CONTAINER_TO_BE_UNRESERVED_WHEN_NEEDED, + ActivityLevel.NODE); return ContainerAllocation.LOCALITY_SKIPPED; } } @@ -622,18 +637,20 @@ private ContainerAllocation assignContainer(Resource clusterResource, // Skip the locality request ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE - + getResourceDiagnostics(capability, availableForDC)); + ActivityDiagnosticConstant.NODE_DO_NOT_HAVE_SUFFICIENT_RESOURCE + + getResourceDiagnostics(capability, availableForDC), + ActivityLevel.NODE); return ContainerAllocation.LOCALITY_SKIPPED; } } ActivitiesLogger.APP.recordAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE + ActivityDiagnosticConstant.NODE_DO_NOT_HAVE_SUFFICIENT_RESOURCE + getResourceDiagnostics(capability, availableForDC), rmContainer == null ? - ActivityState.RESERVED : ActivityState.RE_RESERVED); + ActivityState.RESERVED : ActivityState.RE_RESERVED, + ActivityLevel.NODE); ContainerAllocation result = new ContainerAllocation(null, pendingAsk.getPerAllocationResource(), AllocationState.RESERVED); result.containerNodeType = type; @@ -643,8 +660,9 @@ private ContainerAllocation assignContainer(Resource clusterResource, // Skip the locality request ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE - + getResourceDiagnostics(capability, availableForDC)); + ActivityDiagnosticConstant.NODE_DO_NOT_HAVE_SUFFICIENT_RESOURCE + + getResourceDiagnostics(capability, availableForDC), + ActivityLevel.NODE); return ContainerAllocation.LOCALITY_SKIPPED; } } @@ -719,7 +737,8 @@ private ContainerAllocation handleNewContainerAllocation( null, AllocationState.APP_SKIPPED); ActivitiesLogger.APP.recordAppActivityWithoutAllocation(activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.FAIL_TO_ALLOCATE, ActivityState.REJECTED); + ActivityDiagnosticConstant.APPLICATION_FAIL_TO_ALLOCATE, + ActivityState.REJECTED, ActivityLevel.APP); return ret; } @@ -741,8 +760,8 @@ ContainerAllocation doAllocation(ContainerAllocation allocationResult, LOG.warn("Couldn't get container for allocation!"); ActivitiesLogger.APP.recordAppActivityWithoutAllocation(activitiesManager, node, application, schedulerKey, - ActivityDiagnosticConstant.COULD_NOT_GET_CONTAINER, - ActivityState.REJECTED); + ActivityDiagnosticConstant.APPLICATION_COULD_NOT_GET_CONTAINER, + ActivityState.REJECTED, ActivityLevel.APP); return ContainerAllocation.APP_SKIPPED; } @@ -765,8 +784,8 @@ ContainerAllocation doAllocation(ContainerAllocation allocationResult, .recordAppActivityWithoutAllocation(activitiesManager, node, application, schedulerKey, ActivityDiagnosticConstant. - PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST, - ActivityState.REJECTED); + REQUEST_SKIPPED_BECAUSE_NULL_ANY_REQUEST, + ActivityState.REJECTED, ActivityLevel.REQUEST); return ContainerAllocation.PRIORITY_SKIPPED; } updatedContainer = new RMContainerImpl(container, schedulerKey, @@ -827,8 +846,8 @@ private ContainerAllocation allocate(Resource clusterResource, if (schedulingPS == null) { ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, null, application, schedulerKey, - ActivityDiagnosticConstant. - APPLICATION_PRIORITY_DO_NOT_NEED_RESOURCE); + ActivityDiagnosticConstant.REQUEST_SKIPPED_BECAUSE_NULL_ANY_REQUEST, + ActivityLevel.REQUEST); return new ContainerAllocation(reservedContainer, null, AllocationState.PRIORITY_SKIPPED); } @@ -888,7 +907,8 @@ public CSAssignment assignContainers(Resource clusterResource, } ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation( activitiesManager, node, application, null, - ActivityDiagnosticConstant.APPLICATION_DO_NOT_NEED_RESOURCE); + ActivityDiagnosticConstant.APPLICATION_DO_NOT_NEED_RESOURCE, + ActivityLevel.APP); return CSAssignment.SKIP_ASSIGNMENT; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/JAXBContextResolver.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/JAXBContextResolver.java index f3e1a08d95..e0c6647f31 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/JAXBContextResolver.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/JAXBContextResolver.java @@ -55,7 +55,8 @@ public JAXBContextResolver() throws Exception { UsersInfo.class, UserInfo.class, ApplicationStatisticsInfo.class, StatisticsItemInfo.class, CapacitySchedulerHealthInfo.class, FairSchedulerQueueInfoList.class, AppTimeoutsInfo.class, - AppTimeoutInfo.class, ResourceInformationsInfo.class }; + AppTimeoutInfo.class, ResourceInformationsInfo.class, + ActivitiesInfo.class, AppActivitiesInfo.class}; // these dao classes need root unwrapping final Class[] rootUnwrappedTypes = { NewApplication.class, ApplicationSubmissionContextInfo.class, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java index 762569fa6b..aa29ee6dc3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java @@ -36,6 +36,7 @@ import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.ConcurrentMap; +import java.util.stream.Collectors; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; @@ -756,11 +757,29 @@ public AppActivitiesInfo getAppActivities(@Context HttpServletRequest hsr, Set requiredActions; try { - requiredActions = parseAppActivitiesRequiredActions(actions); + requiredActions = + parseAppActivitiesRequiredActions(getFlatSet(actions)); } catch (IllegalArgumentException e) { return new AppActivitiesInfo(e.getMessage(), appId); } + Set parsedRequestPriorities; + try { + parsedRequestPriorities = getFlatSet(requestPriorities).stream() + .map(e -> Integer.valueOf(e)).collect(Collectors.toSet()); + } catch (NumberFormatException e) { + return new AppActivitiesInfo("request priorities must be integers!", + appId); + } + Set parsedAllocationRequestIds; + try { + parsedAllocationRequestIds = getFlatSet(allocationRequestIds).stream() + .map(e -> Long.valueOf(e)).collect(Collectors.toSet()); + } catch (NumberFormatException e) { + return new AppActivitiesInfo( + "allocation request Ids must be integers!", appId); + } + int limitNum = -1; if (limit != null) { try { @@ -795,12 +814,13 @@ public AppActivitiesInfo getAppActivities(@Context HttpServletRequest hsr, if (requiredActions .contains(RMWSConsts.AppActivitiesRequiredAction.GET)) { AppActivitiesInfo appActivitiesInfo = activitiesManager - .getAppActivitiesInfo(applicationId, requestPriorities, - allocationRequestIds, activitiesGroupBy, limitNum, + .getAppActivitiesInfo(applicationId, parsedRequestPriorities, + parsedAllocationRequestIds, activitiesGroupBy, limitNum, summarize, maxTime); return appActivitiesInfo; } - return new AppActivitiesInfo("Successfully notified actions: " + return new AppActivitiesInfo("Successfully received " + + (actions.size() == 1 ? "action: " : "actions: ") + StringUtils.join(',', actions), appId); } catch (Exception e) { String errMessage = "Cannot find application with given appId"; @@ -812,6 +832,15 @@ public AppActivitiesInfo getAppActivities(@Context HttpServletRequest hsr, return null; } + private Set getFlatSet(Set set) { + if (set == null) { + return null; + } + return set.stream() + .flatMap(e -> Arrays.asList(e.split(StringUtils.COMMA_STR)).stream()) + .collect(Collectors.toSet()); + } + private Set parseAppActivitiesRequiredActions(Set actions) { Set requiredActions = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ActivitiesInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ActivitiesInfo.java index 4ea4cd1e16..668f33bcce 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ActivitiesInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ActivitiesInfo.java @@ -32,16 +32,17 @@ import java.util.List; import java.util.ArrayList; -/* - * DAO object to display node allocation activity. +/** + * DAO object to display allocation activities. */ -@XmlRootElement +@XmlRootElement(name = "activities") @XmlAccessorType(XmlAccessType.FIELD) public class ActivitiesInfo { - protected String nodeId; - protected String timeStamp; - protected String diagnostic = null; - protected List allocations; + private String nodeId; + private Long timestamp; + private String dateTime; + private String diagnostic; + private List allocations; private static final Logger LOG = LoggerFactory.getLogger(ActivitiesInfo.class); @@ -73,9 +74,10 @@ public ActivitiesInfo(List nodeAllocations, String nodeId, this.nodeId = nodeAllocations.get(0).getNodeId().toString(); } + this.timestamp = nodeAllocations.get(0).getTimestamp(); Date date = new Date(); - date.setTime(nodeAllocations.get(0).getTimeStamp()); - this.timeStamp = date.toString(); + date.setTime(this.timestamp); + this.dateTime = date.toString(); for (int i = 0; i < nodeAllocations.size(); i++) { NodeAllocation nodeAllocation = nodeAllocations.get(i); @@ -86,4 +88,24 @@ public ActivitiesInfo(List nodeAllocations, String nodeId, } } } + + public String getNodeId() { + return nodeId; + } + + public Long getTimestamp() { + return timestamp; + } + + public String getDateTime() { + return dateTime; + } + + public String getDiagnostic() { + return diagnostic; + } + + public List getAllocations() { + return allocations; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ActivityNodeInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ActivityNodeInfo.java index b6e0a533b2..257147743f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ActivityNodeInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ActivityNodeInfo.java @@ -38,16 +38,16 @@ @XmlRootElement @XmlAccessorType(XmlAccessType.FIELD) public class ActivityNodeInfo { - protected String name; // The name for activity node - protected String appPriority; - protected String requestPriority; - protected String allocationState; - protected String diagnostic; + private String name; // The name for activity node + private Integer appPriority; + private Integer requestPriority; + private Long allocationRequestId; + private String allocationState; + private String diagnostic; private String nodeId; - private String allocationRequestId; // Used for groups of activities - private String count; + private Integer count; private List nodeIds; protected List children; @@ -55,19 +55,19 @@ public class ActivityNodeInfo { ActivityNodeInfo() { } - public ActivityNodeInfo(String name, ActivityState allocationState, + public ActivityNodeInfo(String name, ActivityState activityState, String diagnostic, NodeId nId) { this.name = name; - this.allocationState = allocationState.name(); + this.allocationState = activityState.name(); this.diagnostic = diagnostic; setNodeId(nId); } - public ActivityNodeInfo(ActivityState groupAllocationState, + public ActivityNodeInfo(ActivityState groupActivityState, String groupDiagnostic, List groupNodeIds) { - this.allocationState = groupAllocationState.name(); + this.allocationState = groupActivityState.name(); this.diagnostic = groupDiagnostic; - this.count = String.valueOf(groupNodeIds.size()); + this.count = groupNodeIds.size(); this.nodeIds = groupNodeIds; } @@ -113,11 +113,11 @@ public void setNodeIds(List nodeIds) { this.nodeIds = nodeIds; } - public String getAllocationRequestId() { + public Long getAllocationRequestId() { return allocationRequestId; } - public String getCount() { + public Integer getCount() { return count; } @@ -128,4 +128,24 @@ public List getNodeIds() { public List getChildren() { return children; } + + public String getAllocationState() { + return allocationState; + } + + public String getName() { + return name; + } + + public Integer getAppPriority() { + return appPriority; + } + + public Integer getRequestPriority() { + return requestPriority; + } + + public String getDiagnostic() { + return diagnostic; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppActivitiesInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppActivitiesInfo.java index c2777499ac..16d75e3bff 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppActivitiesInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppActivitiesInfo.java @@ -33,16 +33,17 @@ import java.util.Date; import java.util.List; -/* +/** * DAO object to display application activity. */ -@XmlRootElement +@XmlRootElement(name = "appActivities") @XmlAccessorType(XmlAccessType.FIELD) public class AppActivitiesInfo { - protected String applicationId; - protected String diagnostic; - protected String timeStamp; - protected List allocations; + private String applicationId; + private String diagnostic; + private Long timestamp; + private String dateTime; + private List allocations; private static final Logger LOG = LoggerFactory.getLogger(AppActivitiesInfo.class); @@ -53,10 +54,7 @@ public AppActivitiesInfo() { public AppActivitiesInfo(String errorMessage, String applicationId) { this.diagnostic = errorMessage; this.applicationId = applicationId; - - Date date = new Date(); - date.setTime(SystemClock.getInstance().getTime()); - this.timeStamp = date.toString(); + setTime(SystemClock.getInstance().getTime()); } public AppActivitiesInfo(List appAllocations, @@ -67,10 +65,7 @@ public AppActivitiesInfo(List appAllocations, if (appAllocations == null) { diagnostic = "waiting for display"; - - Date date = new Date(); - date.setTime(SystemClock.getInstance().getTime()); - this.timeStamp = date.toString(); + setTime(SystemClock.getInstance().getTime()); } else { for (int i = appAllocations.size() - 1; i > -1; i--) { AppAllocation appAllocation = appAllocations.get(i); @@ -81,8 +76,29 @@ public AppActivitiesInfo(List appAllocations, } } + private void setTime(long ts) { + this.timestamp = ts; + this.dateTime = new Date(ts).toString(); + } + @VisibleForTesting public List getAllocations() { return allocations; } + + public Long getTimestamp() { + return timestamp; + } + + public String getDateTime() { + return dateTime; + } + + public String getApplicationId() { + return applicationId; + } + + public String getDiagnostic() { + return diagnostic; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAllocationInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAllocationInfo.java index 6ae1f9a819..1635e04d09 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAllocationInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAllocationInfo.java @@ -38,27 +38,27 @@ @XmlAccessorType(XmlAccessType.FIELD) public class AppAllocationInfo { private String nodeId; - private String queueName; - private String appPriority; - private long timestamp; + private Long timestamp; private String dateTime; + private String queueName; + private Integer appPriority; private String allocationState; private String diagnostic; - private List requestAllocation; + private List children; AppAllocationInfo() { } AppAllocationInfo(AppAllocation allocation, RMWSConsts.ActivitiesGroupBy groupBy) { - this.requestAllocation = new ArrayList<>(); + this.children = new ArrayList<>(); this.nodeId = allocation.getNodeId(); this.queueName = allocation.getQueueName(); this.appPriority = allocation.getPriority() == null ? - null : allocation.getPriority().toString(); + null : allocation.getPriority().getPriority(); this.timestamp = allocation.getTime(); this.dateTime = new Date(allocation.getTime()).toString(); - this.allocationState = allocation.getAppState().name(); + this.allocationState = allocation.getActivityState().name(); this.diagnostic = allocation.getDiagnostic(); Map> requestToActivityNodes = allocation.getAllocationAttempts().stream().collect(Collectors @@ -68,7 +68,7 @@ public class AppAllocationInfo { .values()) { AppRequestAllocationInfo requestAllocationInfo = new AppRequestAllocationInfo(requestActivityNodes, groupBy); - this.requestAllocation.add(requestAllocationInfo); + this.children.add(requestAllocationInfo); } } @@ -80,11 +80,11 @@ public String getQueueName() { return queueName; } - public String getAppPriority() { + public Integer getAppPriority() { return appPriority; } - public long getTimestamp() { + public Long getTimestamp() { return timestamp; } @@ -96,8 +96,8 @@ public String getAllocationState() { return allocationState; } - public List getRequestAllocation() { - return requestAllocation; + public List getChildren() { + return children; } public String getDiagnostic() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppRequestAllocationInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppRequestAllocationInfo.java index 09251283ae..f12f83d94b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppRequestAllocationInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppRequestAllocationInfo.java @@ -34,10 +34,11 @@ @XmlRootElement @XmlAccessorType(XmlAccessType.FIELD) public class AppRequestAllocationInfo { - private String requestPriority; - private String allocationRequestId; + private Integer requestPriority; + private Long allocationRequestId; private String allocationState; - private List allocationAttempt; + private String diagnostic; + private List children; AppRequestAllocationInfo() { } @@ -48,15 +49,19 @@ public class AppRequestAllocationInfo { this.requestPriority = lastActivityNode.getRequestPriority(); this.allocationRequestId = lastActivityNode.getAllocationRequestId(); this.allocationState = lastActivityNode.getState().name(); - this.allocationAttempt = ActivitiesUtils + if (lastActivityNode.isRequestType() + && lastActivityNode.getDiagnostic() != null) { + this.diagnostic = lastActivityNode.getDiagnostic(); + } + this.children = ActivitiesUtils .getRequestActivityNodeInfos(activityNodes, groupBy); } - public String getRequestPriority() { + public Integer getRequestPriority() { return requestPriority; } - public String getAllocationRequestId() { + public Long getAllocationRequestId() { return allocationRequestId; } @@ -64,7 +69,11 @@ public String getAllocationState() { return allocationState; } - public List getAllocationAttempt() { - return allocationAttempt; + public List getChildren() { + return children; + } + + public String getDiagnostic() { + return diagnostic; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeAllocationInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeAllocationInfo.java index 71c576d74c..8379d1d286 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeAllocationInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeAllocationInfo.java @@ -33,9 +33,10 @@ @XmlRootElement @XmlAccessorType(XmlAccessType.FIELD) public class NodeAllocationInfo { - protected String allocatedContainerId; - protected String finalAllocationState; - protected ActivityNodeInfo root = null; + private String partition; + private String updatedContainerId; + private String finalAllocationState; + private ActivityNodeInfo root = null; private static final Logger LOG = LoggerFactory.getLogger(NodeAllocationInfo.class); @@ -45,10 +46,25 @@ public class NodeAllocationInfo { NodeAllocationInfo(NodeAllocation allocation, RMWSConsts.ActivitiesGroupBy groupBy) { - this.allocatedContainerId = allocation.getContainerId(); + this.partition = allocation.getPartition(); + this.updatedContainerId = allocation.getContainerId(); this.finalAllocationState = allocation.getFinalAllocationState().name(); - root = new ActivityNodeInfo(allocation.getRoot(), groupBy); + } + public String getPartition() { + return partition; + } + + public String getUpdatedContainerId() { + return updatedContainerId; + } + + public String getFinalAllocationState() { + return finalAllocationState; + } + + public ActivityNodeInfo getRoot() { + return root; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/TestActivitiesManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/TestActivitiesManager.java index 35b8872634..9e3ff635ee 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/TestActivitiesManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/TestActivitiesManager.java @@ -150,10 +150,10 @@ public void testRecordingDifferentNodeActivitiesInMultiThreads() .recordAppActivityWithoutAllocation(activitiesManager, node, randomApp, new SchedulerRequestKey(Priority.newInstance(0), 0, null), - ActivityDiagnosticConstant.FAIL_TO_ALLOCATE, - ActivityState.REJECTED); + ActivityDiagnosticConstant.NODE_IS_BLACKLISTED, + ActivityState.REJECTED, ActivityLevel.NODE); ActivitiesLogger.NODE - .finishNodeUpdateRecording(activitiesManager, node.getNodeID()); + .finishNodeUpdateRecording(activitiesManager, node.getNodeID(), ""); return null; }; futures.add(threadPoolExecutor.submit(task)); @@ -195,10 +195,10 @@ public void testRecordingSchedulerActivitiesForMultiNodesInMultiThreads() .recordAppActivityWithoutAllocation(activitiesManager, node, randomApp, new SchedulerRequestKey(Priority.newInstance(0), 0, null), - ActivityDiagnosticConstant.FAIL_TO_ALLOCATE, - ActivityState.REJECTED); + ActivityDiagnosticConstant.NODE_IS_BLACKLISTED, + ActivityState.REJECTED, ActivityLevel.NODE); ActivitiesLogger.NODE.finishNodeUpdateRecording(activitiesManager, - ActivitiesManager.EMPTY_NODE_ID); + ActivitiesManager.EMPTY_NODE_ID, ""); return null; }; futures.add(threadPoolExecutor.submit(task)); @@ -236,13 +236,13 @@ public void testRecordingAppActivitiesInMultiThreads() .recordAppActivityWithoutAllocation(activitiesManager, node, randomApp, new SchedulerRequestKey(Priority.newInstance(0), 0, null), - ActivityDiagnosticConstant.FAIL_TO_ALLOCATE, - ActivityState.REJECTED); + ActivityDiagnosticConstant.NODE_IS_BLACKLISTED, + ActivityState.REJECTED, ActivityLevel.NODE); } ActivitiesLogger.APP - .finishAllocatedAppAllocationRecording(activitiesManager, - randomApp.getApplicationId(), null, ActivityState.SKIPPED, - ActivityDiagnosticConstant.SKIPPED_ALL_PRIORITIES); + .finishSkippedAppAllocationRecording(activitiesManager, + randomApp.getApplicationId(), ActivityState.SKIPPED, + ActivityDiagnosticConstant.EMPTY); return null; }; futures.add(threadPoolExecutor.submit(task)); @@ -285,12 +285,12 @@ public void testAppActivitiesTTL() throws Exception { ActivitiesLogger.APP .recordAppActivityWithoutAllocation(newActivitiesManager, node, app, new SchedulerRequestKey(Priority.newInstance(0), 0, null), - ActivityDiagnosticConstant.FAIL_TO_ALLOCATE, - ActivityState.REJECTED); + ActivityDiagnosticConstant.NODE_IS_BLACKLISTED, + ActivityState.REJECTED, ActivityLevel.NODE); ActivitiesLogger.APP - .finishAllocatedAppAllocationRecording(newActivitiesManager, - app.getApplicationId(), null, ActivityState.SKIPPED, - ActivityDiagnosticConstant.SKIPPED_ALL_PRIORITIES); + .finishSkippedAppAllocationRecording(newActivitiesManager, + app.getApplicationId(), ActivityState.SKIPPED, + ActivityDiagnosticConstant.EMPTY); } AppActivitiesInfo appActivitiesInfo = newActivitiesManager .getAppActivitiesInfo(app.getApplicationId(), null, null, null, -1, @@ -322,15 +322,15 @@ public void testAppActivitiesPerformance() { for (int i = 0; i < numNodes; i++) { NodeId nodeId = NodeId.newInstance("host" + i, 0); activitiesManager - .addSchedulingActivityForApp(app.getApplicationId(), null, "0", + .addSchedulingActivityForApp(app.getApplicationId(), null, 0, ActivityState.SKIPPED, - ActivityDiagnosticConstant.FAIL_TO_ALLOCATE, "container", - nodeId, "0"); + ActivityDiagnosticConstant.NODE_IS_BLACKLISTED, + ActivityLevel.NODE, nodeId, 0L); } ActivitiesLogger.APP - .finishAllocatedAppAllocationRecording(activitiesManager, - app.getApplicationId(), null, ActivityState.SKIPPED, - ActivityDiagnosticConstant.SKIPPED_ALL_PRIORITIES); + .finishSkippedAppAllocationRecording(activitiesManager, + app.getApplicationId(), ActivityState.SKIPPED, + ActivityDiagnosticConstant.EMPTY); } // It often take a longer time for the first query, ignore this distraction @@ -346,11 +346,11 @@ public void testAppActivitiesPerformance() { Assert.assertEquals(numActivities, appActivitiesInfo.getAllocations().size()); Assert.assertEquals(1, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() + appActivitiesInfo.getAllocations().get(0).getChildren() .size()); Assert.assertEquals(numNodes, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() - .get(0).getAllocationAttempt().size()); + appActivitiesInfo.getAllocations().get(0).getChildren() + .get(0).getChildren().size()); return null; }; testManyTimes("Getting normal app activities", normalSupplier, @@ -364,14 +364,14 @@ public void testAppActivitiesPerformance() { Assert.assertEquals(numActivities, appActivitiesInfo.getAllocations().size()); Assert.assertEquals(1, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() + appActivitiesInfo.getAllocations().get(0).getChildren() .size()); Assert.assertEquals(1, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() - .get(0).getAllocationAttempt().size()); + appActivitiesInfo.getAllocations().get(0).getChildren() + .get(0).getChildren().size()); Assert.assertEquals(numNodes, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() - .get(0).getAllocationAttempt().get(0).getNodeIds().size()); + appActivitiesInfo.getAllocations().get(0).getChildren() + .get(0).getChildren().get(0).getNodeIds().size()); return null; }; testManyTimes("Getting aggregated app activities", aggregatedSupplier, @@ -384,14 +384,14 @@ public void testAppActivitiesPerformance() { RMWSConsts.ActivitiesGroupBy.DIAGNOSTIC, -1, true, 100); Assert.assertEquals(1, appActivitiesInfo.getAllocations().size()); Assert.assertEquals(1, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() + appActivitiesInfo.getAllocations().get(0).getChildren() .size()); Assert.assertEquals(1, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() - .get(0).getAllocationAttempt().size()); + appActivitiesInfo.getAllocations().get(0).getChildren() + .get(0).getChildren().size()); Assert.assertEquals(numNodes, - appActivitiesInfo.getAllocations().get(0).getRequestAllocation() - .get(0).getAllocationAttempt().get(0).getNodeIds().size()); + appActivitiesInfo.getAllocations().get(0).getChildren() + .get(0).getChildren().get(0).getNodeIds().size()); return null; }; testManyTimes("Getting summarized app activities", summarizedSupplier, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/ActivitiesTestUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/ActivitiesTestUtils.java index 666e5fe9a5..3c6db7d470 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/ActivitiesTestUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/ActivitiesTestUtils.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp; +import com.google.common.collect.Lists; import com.sun.jersey.api.client.ClientResponse; import com.sun.jersey.api.client.WebResource; import org.apache.hadoop.http.JettyUtils; @@ -43,6 +44,7 @@ import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; @@ -51,8 +53,8 @@ */ public final class ActivitiesTestUtils { - public static final String INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX = - ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE + public static final String TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX = + ActivityDiagnosticConstant.NODE_TOTAL_RESOURCE_INSUFFICIENT_FOR_REQUEST + ", " + GenericDiagnosticsCollector.RESOURCE_DIAGNOSTICS_PREFIX; public static final String UNMATCHED_PARTITION_OR_PC_DIAGNOSTIC_PREFIX = @@ -60,12 +62,51 @@ public final class ActivitiesTestUtils { NODE_DO_NOT_MATCH_PARTITION_OR_PLACEMENT_CONSTRAINTS + ", " + GenericDiagnosticsCollector.PLACEMENT_CONSTRAINT_DIAGNOSTICS_PREFIX; + /* + * Field names in response of scheduler/app activities. + */ + public static final String FN_ACT_ALLOCATIONS = "allocations"; + + public static final String FN_ACT_DIAGNOSTIC = "diagnostic"; + + public static final String FN_ACT_ALLOCATION_STATE = "allocationState"; + + public static final String FN_ACT_FINAL_ALLOCATION_STATE = + "finalAllocationState"; + + public static final String FN_ACT_NODE_ID = "nodeId"; + + public static final String FN_ACT_NODE_IDS = "nodeIds"; + + public static final String FN_ACT_COUNT = "count"; + + public static final String FN_ACT_APP_PRIORITY = "appPriority"; + + public static final String FN_ACT_REQUEST_PRIORITY = "requestPriority"; + + public static final String FN_ACT_ALLOCATION_REQUEST_ID = + "allocationRequestId"; + + public static final String FN_APP_ACT_CHILDREN = "children"; + + public static final String FN_APP_ACT_ROOT = "appActivities"; + + public static final String FN_SCHEDULER_ACT_NAME = "name"; + + public static final String FN_SCHEDULER_ACT_ALLOCATIONS_ROOT = "root"; + + public static final String FN_SCHEDULER_ACT_CHILDREN = "children"; + + public static final String FN_SCHEDULER_ACT_ROOT = "activities"; + private ActivitiesTestUtils(){} public static List findInAllocations(JSONObject allocationObj, Predicate p) throws JSONException { List target = new ArrayList<>(); - recursiveFindObj(allocationObj.getJSONObject("root"), p, target); + recursiveFindObj(allocationObj.getJSONObject( + FN_SCHEDULER_ACT_ALLOCATIONS_ROOT), p, + target); return target; } @@ -74,14 +115,14 @@ private static void recursiveFindObj(JSONObject obj, Predicate p, if (p.test(obj)) { target.add(obj); } - if (obj.has("children")) { - JSONArray childrenObjs = obj.optJSONArray("children"); + if (obj.has(FN_SCHEDULER_ACT_CHILDREN)) { + JSONArray childrenObjs = obj.optJSONArray(FN_SCHEDULER_ACT_CHILDREN); if (childrenObjs != null) { for (int i = 0; i < childrenObjs.length(); i++) { recursiveFindObj(childrenObjs.getJSONObject(i), p, target); } } else { - JSONObject childrenObj = obj.optJSONObject("children"); + JSONObject childrenObj = obj.optJSONObject(FN_SCHEDULER_ACT_CHILDREN); recursiveFindObj(childrenObj, p, target); } } @@ -103,17 +144,18 @@ public static SchedulingRequest schedulingRequest(int numContainers, public static void verifyNumberOfNodes(JSONObject allocation, int expectValue) throws Exception { - if (allocation.isNull("root")) { + if (allocation.isNull(FN_SCHEDULER_ACT_ALLOCATIONS_ROOT)) { assertEquals("State of allocation is wrong", expectValue, 0); } else { assertEquals("State of allocation is wrong", expectValue, - 1 + getNumberOfNodes(allocation.getJSONObject("root"))); + 1 + getNumberOfNodes( + allocation.getJSONObject(FN_SCHEDULER_ACT_ALLOCATIONS_ROOT))); } } public static int getNumberOfNodes(JSONObject allocation) throws Exception { - if (!allocation.isNull("children")) { - Object object = allocation.get("children"); + if (!allocation.isNull(FN_SCHEDULER_ACT_CHILDREN)) { + Object object = allocation.get(FN_SCHEDULER_ACT_CHILDREN); if (object.getClass() == JSONObject.class) { return 1 + getNumberOfNodes((JSONObject) object); } else { @@ -137,10 +179,18 @@ public static void verifyStateOfAllocations(JSONObject allocation, public static void verifyNumberOfAllocations(JSONObject json, int expectValue) throws Exception { - if (json.isNull("allocations")) { + JSONObject activitiesJson; + if (json.has(FN_APP_ACT_ROOT)) { + activitiesJson = json.getJSONObject(FN_APP_ACT_ROOT); + } else if (json.has(FN_SCHEDULER_ACT_ROOT)) { + activitiesJson = json.getJSONObject(FN_SCHEDULER_ACT_ROOT); + } else { + throw new IllegalArgumentException("Can't parse allocations!"); + } + if (activitiesJson.isNull(FN_ACT_ALLOCATIONS)) { assertEquals("Number of allocations is wrong", expectValue, 0); } else { - Object object = json.get("allocations"); + Object object = activitiesJson.get(FN_ACT_ALLOCATIONS); if (object.getClass() == JSONObject.class) { assertEquals("Number of allocations is wrong", expectValue, 1); } else if (object.getClass() == JSONArray.class) { @@ -153,31 +203,32 @@ public static void verifyNumberOfAllocations(JSONObject json, int expectValue) public static void verifyQueueOrder(JSONObject json, String expectOrder) throws Exception { String order = ""; - if (!json.isNull("root")) { - JSONObject root = json.getJSONObject("root"); - order = root.getString("name") + "-" + getQueueOrder(root); + if (!json.isNull(FN_SCHEDULER_ACT_ALLOCATIONS_ROOT)) { + JSONObject root = json.getJSONObject(FN_SCHEDULER_ACT_ALLOCATIONS_ROOT); + order = root.getString(FN_SCHEDULER_ACT_NAME) + "-" + getQueueOrder(root); } assertEquals("Order of queue is wrong", expectOrder, order.substring(0, order.length() - 1)); } public static String getQueueOrder(JSONObject node) throws Exception { - if (!node.isNull("children")) { - Object children = node.get("children"); + if (!node.isNull(FN_SCHEDULER_ACT_CHILDREN)) { + Object children = node.get(FN_SCHEDULER_ACT_CHILDREN); if (children.getClass() == JSONObject.class) { - if (!((JSONObject) children).isNull("appPriority")) { + if (!((JSONObject) children).isNull(FN_ACT_APP_PRIORITY)) { return ""; } - return ((JSONObject) children).getString("name") + "-" + getQueueOrder( - (JSONObject) children); + return ((JSONObject) children).getString(FN_SCHEDULER_ACT_NAME) + "-" + + getQueueOrder((JSONObject) children); } else if (children.getClass() == JSONArray.class) { String order = ""; for (int i = 0; i < ((JSONArray) children).length(); i++) { JSONObject child = (JSONObject) ((JSONArray) children).get(i); - if (!child.isNull("appPriority")) { + if (!child.isNull(FN_ACT_APP_PRIORITY)) { return ""; } - order += (child.getString("name") + "-" + getQueueOrder(child)); + order += (child.getString(FN_SCHEDULER_ACT_NAME) + "-" + + getQueueOrder(child)); } return order; } @@ -185,12 +236,52 @@ public static String getQueueOrder(JSONObject node) throws Exception { return ""; } + public static JSONObject getFirstSubNodeFromJson(JSONObject json, + String... hierarchicalFieldNames) { + return getSubNodesFromJson(json, hierarchicalFieldNames).get(0); + } + + public static List getSubNodesFromJson(JSONObject json, + String... hierarchicalFieldNames) { + List results = Lists.newArrayList(json); + for (String fieldName : hierarchicalFieldNames) { + results = results.stream().filter(e -> e.has(fieldName)) + .flatMap(e -> getJSONObjects(e, fieldName).stream()) + .collect(Collectors.toList()); + if (results.isEmpty()) { + throw new IllegalArgumentException("Can't find hierarchical fields " + + Arrays.toString(hierarchicalFieldNames)); + } + } + return results; + } + + private static List getJSONObjects(JSONObject json, + String fieldName) { + List objects = new ArrayList<>(); + if (json.has(fieldName)) { + try { + Object tmpObj = json.get(fieldName); + if (tmpObj.getClass() == JSONObject.class) { + objects.add((JSONObject) tmpObj); + } else if (tmpObj.getClass() == JSONArray.class) { + for (int i = 0; i < ((JSONArray) tmpObj).length(); i++) { + objects.add(((JSONArray) tmpObj).getJSONObject(i)); + } + } + } catch (JSONException e) { + throw new RuntimeException(e); + } + } + return objects; + } + public static void verifyNumberOfAllocationAttempts(JSONObject allocation, int expectValue) throws Exception { - if (allocation.isNull("allocationAttempt")) { + if (allocation.isNull(FN_APP_ACT_CHILDREN)) { assertEquals("Number of allocation attempts is wrong", expectValue, 0); } else { - Object object = allocation.get("allocationAttempt"); + Object object = allocation.get(FN_APP_ACT_CHILDREN); if (object.getClass() == JSONObject.class) { assertEquals("Number of allocations attempts is wrong", expectValue, 1); } else if (object.getClass() == JSONArray.class) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesForCSWithPartitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesForCSWithPartitions.java index 95dffce227..46b809c314 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesForCSWithPartitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesForCSWithPartitions.java @@ -18,26 +18,47 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATION_STATE; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATIONS; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_DIAGNOSTIC; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_NAME; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_ROOT; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.getFirstSubNodeFromJson; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyNumberOfAllocations; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.StringReader; +import java.util.Arrays; import java.util.HashSet; +import java.util.List; import java.util.Set; +import java.util.function.Predicate; import javax.ws.rs.core.MediaType; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Sets; import org.apache.hadoop.http.JettyUtils; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeLabel; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.MockAM; +import org.apache.hadoop.yarn.server.resourcemanager.MockNM; import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityDiagnosticConstant; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityState; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; +import org.apache.hadoop.yarn.util.resource.Resources; import org.apache.hadoop.yarn.webapp.GenericExceptionHandler; import org.apache.hadoop.yarn.webapp.GuiceServletConfig; import org.apache.hadoop.yarn.webapp.JerseyTestBase; @@ -242,6 +263,82 @@ public void testSchedulerPartitionsXML() throws JSONException, Exception { verifySchedulerInfoXML(dom); } + @Test + public void testPartitionInSchedulerActivities() throws Exception { + rm.start(); + rm.getRMContext().getNodeLabelManager().addLabelsToNode(ImmutableMap + .of(NodeId.newInstance("127.0.0.1", 0), Sets.newHashSet(LABEL_LX))); + + MockNM nm1 = new MockNM("127.0.0.1:1234", 2 * 1024, + rm.getResourceTrackerService()); + MockNM nm2 = new MockNM("127.0.0.2:1234", 2 * 1024, + rm.getResourceTrackerService()); + nm1.registerNode(); + nm2.registerNode(); + + try { + RMApp app1 = rm.submitApp(1024, "app1", "user1", null, QUEUE_B, LABEL_LX); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1); + am1.allocate(Arrays.asList( + ResourceRequest.newBuilder().priority(Priority.UNDEFINED) + .resourceName("*").nodeLabelExpression(LABEL_LX) + .capability(Resources.createResource(2048)).numContainers(1) + .build()), null); + + WebResource sr = resource().path(RMWSConsts.RM_WEB_SERVICE_PATH) + .path(RMWSConsts.SCHEDULER_ACTIVITIES); + ActivitiesTestUtils.requestWebResource(sr, null); + + nm1.nodeHeartbeat(true); + Thread.sleep(1000); + + JSONObject schedulerActivitiesJson = + ActivitiesTestUtils.requestWebResource(sr, null); + + /* + * verify scheduler activities + */ + verifyNumberOfAllocations(schedulerActivitiesJson, 1); + // verify queue Qb + Predicate findQueueBPred = + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals(QUEUE_B); + List queueBObj = ActivitiesTestUtils.findInAllocations( + getFirstSubNodeFromJson(schedulerActivitiesJson, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS), findQueueBPred); + assertEquals(1, queueBObj.size()); + assertEquals(ActivityState.REJECTED.name(), + queueBObj.get(0).optString(FN_ACT_ALLOCATION_STATE)); + assertEquals(ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM + + " from " + am1.getApplicationAttemptId().getApplicationId(), + queueBObj.get(0).optString(FN_ACT_DIAGNOSTIC)); + // verify queue Qa + Predicate findQueueAPred = + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals(QUEUE_A); + List queueAObj = ActivitiesTestUtils.findInAllocations( + getFirstSubNodeFromJson(schedulerActivitiesJson, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS), findQueueAPred); + assertEquals(1, queueAObj.size()); + assertEquals(ActivityState.REJECTED.name(), + queueAObj.get(0).optString(FN_ACT_ALLOCATION_STATE)); + assertEquals( + ActivityDiagnosticConstant.QUEUE_NOT_ABLE_TO_ACCESS_PARTITION, + queueAObj.get(0).optString(FN_ACT_DIAGNOSTIC)); + // verify queue Qc + Predicate findQueueCPred = + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals(QUEUE_C); + List queueCObj = ActivitiesTestUtils.findInAllocations( + getFirstSubNodeFromJson(schedulerActivitiesJson, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS), findQueueCPred); + assertEquals(1, queueCObj.size()); + assertEquals(ActivityState.SKIPPED.name(), + queueCObj.get(0).optString(FN_ACT_ALLOCATION_STATE)); + assertEquals(ActivityDiagnosticConstant.QUEUE_DO_NOT_NEED_MORE_RESOURCE, + queueCObj.get(0).optString(FN_ACT_DIAGNOSTIC)); + } finally { + rm.stop(); + } + } + private void verifySchedulerInfoXML(Document dom) throws Exception { NodeList scheduler = dom.getElementsByTagName("scheduler"); assertEquals("incorrect number of elements", 1, scheduler.getLength()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivities.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivities.java index 8bdecb769d..1942708b88 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivities.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivities.java @@ -22,6 +22,8 @@ import com.sun.jersey.api.client.WebResource; import com.sun.jersey.core.util.MultivaluedMapImpl; import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityDiagnosticConstant; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityState; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.http.JettyUtils; @@ -54,17 +56,37 @@ import static org.apache.hadoop.yarn.api.resource.PlacementConstraints.NODE; import static org.apache.hadoop.yarn.api.resource.PlacementConstraints.PlacementTargets.allocationTag; -import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATIONS; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATION_REQUEST_ID; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATION_STATE; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_DIAGNOSTIC; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_FINAL_ALLOCATION_STATE; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_NODE_ID; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_NODE_IDS; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_REQUEST_PRIORITY; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_APP_ACT_CHILDREN; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_APP_ACT_ROOT; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_CHILDREN; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_NAME; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_ALLOCATIONS_ROOT; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_ROOT; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.UNMATCHED_PARTITION_OR_PC_DIAGNOSTIC_PREFIX; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.getFirstSubNodeFromJson; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.getSubNodesFromJson; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyNumberOfAllocationAttempts; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyNumberOfAllocations; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyNumberOfNodes; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyQueueOrder; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyStateOfAllocations; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +/** + * Tests for scheduler/app activities. + */ public class TestRMWebServicesSchedulerActivities extends TestRMWebServicesCapacitySched { @@ -117,9 +139,12 @@ public void testAssignMultipleContainersPerNodeHeartbeat() // Collection logic of scheduler activities changed after YARN-9313, // only one allocation should be recorded for all scenarios. verifyNumberOfAllocations(json, 1); - verifyStateOfAllocations(json.getJSONObject("allocations"), - "finalAllocationState", "ALLOCATED"); - verifyQueueOrder(json.getJSONObject("allocations"), "root-a-b-b2-b3-b1"); + JSONObject allocation = getFirstSubNodeFromJson(json, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS); + verifyStateOfAllocations(allocation, + FN_ACT_FINAL_ALLOCATION_STATE, "ALLOCATED"); + verifyQueueOrder(allocation, + "root-a-b-b2-b3-b1"); } finally { rm.stop(); } @@ -167,7 +192,14 @@ public void testAssignWithoutAvailableResource() throws Exception { response.getType().toString()); json = response.getEntity(JSONObject.class); - verifyNumberOfAllocations(json, 0); + // verify scheduler activities + verifyNumberOfAllocations(json, 1); + JSONObject rootObj = getFirstSubNodeFromJson(json, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS) + .getJSONObject(FN_SCHEDULER_ACT_ALLOCATIONS_ROOT); + assertTrue(rootObj.optString(FN_ACT_DIAGNOSTIC).startsWith( + ActivityDiagnosticConstant. + INIT_CHECK_SINGLE_NODE_RESOURCE_INSUFFICIENT)); } finally { rm.stop(); } @@ -301,10 +333,12 @@ public void testReserveNewContainer() throws Exception { verifyNumberOfAllocations(json, 1); - verifyQueueOrder(json.getJSONObject("allocations"), "root-a-b-b3-b1"); - - JSONObject allocations = json.getJSONObject("allocations"); - verifyStateOfAllocations(allocations, "finalAllocationState", "RESERVED"); + JSONObject allocations = getFirstSubNodeFromJson(json, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS); + verifyQueueOrder(allocations, + "root-a-b-b3-b1"); + verifyStateOfAllocations(allocations, FN_ACT_FINAL_ALLOCATION_STATE, + "RESERVED"); // Do a node heartbeat again without releasing container from app2 r = resource(); @@ -329,10 +363,11 @@ public void testReserveNewContainer() throws Exception { verifyNumberOfAllocations(json, 1); - verifyQueueOrder(json.getJSONObject("allocations"), "b1"); - - allocations = json.getJSONObject("allocations"); - verifyStateOfAllocations(allocations, "finalAllocationState", "SKIPPED"); + JSONObject allocation = getFirstSubNodeFromJson(json, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS); + verifyQueueOrder(allocation, "b1"); + verifyStateOfAllocations(allocation, FN_ACT_FINAL_ALLOCATION_STATE, + "RESERVED"); // Finish application 2 CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); @@ -365,10 +400,10 @@ public void testReserveNewContainer() throws Exception { verifyNumberOfAllocations(json, 1); - verifyQueueOrder(json.getJSONObject("allocations"), "b1"); - - allocations = json.getJSONObject("allocations"); - verifyStateOfAllocations(allocations, "finalAllocationState", + allocations = getFirstSubNodeFromJson(json, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS); + verifyQueueOrder(allocations, "b1"); + verifyStateOfAllocations(allocations, FN_ACT_FINAL_ALLOCATION_STATE, "ALLOCATED_FROM_RESERVED"); } finally { rm.stop(); @@ -411,14 +446,15 @@ public void testActivityJSON() throws Exception { verifyNumberOfAllocations(json, 1); - JSONObject allocations = json.getJSONObject("allocations"); - verifyStateOfAllocations(allocations, "finalAllocationState", + JSONObject allocation = getFirstSubNodeFromJson(json, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS); + verifyStateOfAllocations(allocation, FN_ACT_FINAL_ALLOCATION_STATE, "ALLOCATED"); // Increase number of nodes to 6 since request node has been added - verifyNumberOfNodes(allocations, 6); + verifyNumberOfNodes(allocation, 6); - verifyQueueOrder(json.getJSONObject("allocations"), "root-b-b1"); + verifyQueueOrder(allocation, "root-b-b1"); } finally { rm.stop(); } @@ -451,22 +487,27 @@ public void testAppActivityJSON() throws Exception { //Check app activities verifyNumberOfAllocations(json, 1); - JSONObject allocations = json.getJSONObject("allocations"); - verifyStateOfAllocations(allocations, "allocationState", "ALLOCATED"); + JSONObject allocation = getFirstSubNodeFromJson(json, + FN_APP_ACT_ROOT, FN_ACT_ALLOCATIONS); + verifyStateOfAllocations(allocation, FN_ACT_ALLOCATION_STATE, + "ALLOCATED"); //Check request allocation JSONObject requestAllocationObj = - allocations.getJSONObject("requestAllocation"); - verifyStateOfAllocations(requestAllocationObj, "allocationState", + getFirstSubNodeFromJson(allocation, FN_APP_ACT_CHILDREN); + verifyStateOfAllocations(requestAllocationObj, FN_ACT_ALLOCATION_STATE, "ALLOCATED"); - assertEquals("0", requestAllocationObj.optString("requestPriority")); - assertEquals("-1", requestAllocationObj.optString("allocationRequestId")); + assertEquals(0, + requestAllocationObj.optInt(FN_ACT_REQUEST_PRIORITY)); + assertEquals(-1, + requestAllocationObj.optLong(FN_ACT_ALLOCATION_REQUEST_ID)); //Check allocation attempts verifyNumberOfAllocationAttempts(requestAllocationObj, 1); - JSONObject allocationAttemptObj = - requestAllocationObj.getJSONObject("allocationAttempt"); - verifyStateOfAllocations(allocationAttemptObj, "allocationState", - "ALLOCATED"); - assertNotNull(allocationAttemptObj.get("nodeId")); + List allocationAttempts = + getSubNodesFromJson(requestAllocationObj, FN_APP_ACT_CHILDREN); + assertEquals(1, allocationAttempts.size()); + verifyStateOfAllocations(allocationAttempts.get(0), + FN_ACT_ALLOCATION_STATE, "ALLOCATED"); + assertNotNull(allocationAttempts.get(0).get(FN_ACT_NODE_ID)); } finally { rm.stop(); } @@ -508,10 +549,11 @@ public void testAppAssignMultipleContainersPerNodeHeartbeat() verifyNumberOfAllocations(json, 10); - JSONArray allocations = json.getJSONArray("allocations"); - for (int i = 0; i < allocations.length(); i++) { - verifyStateOfAllocations(allocations.getJSONObject(i), - "allocationState", "ALLOCATED"); + List allocations = + getSubNodesFromJson(json, FN_APP_ACT_ROOT, FN_ACT_ALLOCATIONS); + for (int i = 0; i < allocations.size(); i++) { + verifyStateOfAllocations(allocations.get(i), + FN_ACT_ALLOCATION_STATE, "ALLOCATED"); } } finally { rm.stop(); @@ -643,8 +685,7 @@ public void testAppReserveNewContainer() throws Exception { } @Test (timeout=30000) - public void testInsufficientResourceDiagnostic() - throws Exception { + public void testInsufficientResourceDiagnostic() throws Exception { rm.start(); CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); @@ -664,7 +705,8 @@ public void testInsufficientResourceDiagnostic() response.getType().toString()); JSONObject json = response.getEntity(JSONObject.class); assertEquals("waiting for next allocation", - json.getString("diagnostic")); + getFirstSubNodeFromJson(json, FN_SCHEDULER_ACT_ROOT) + .optString(FN_ACT_DIAGNOSTIC)); am1.allocate(Arrays.asList(ResourceRequest .newInstance(Priority.UNDEFINED, "*", @@ -682,24 +724,26 @@ public void testInsufficientResourceDiagnostic() json = response.getEntity(JSONObject.class); verifyNumberOfAllocations(json, 1); - JSONObject allocationObj = json.getJSONObject("allocations"); + JSONObject allocationObj = getFirstSubNodeFromJson(json, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS); // check diagnostics Predicate findReqPred = - (obj) -> obj.optString("name").equals("request_-1_-1"); + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_-1_-1"); List app2ReqObjs = ActivitiesTestUtils.findInAllocations(allocationObj, findReqPred); assertEquals(1, app2ReqObjs.size()); - JSONObject reqChild = app2ReqObjs.get(0).getJSONObject("children"); - assertTrue(reqChild.getString("diagnostic") - .contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX)); + List reqAllocations = + getSubNodesFromJson(app2ReqObjs.get(0), FN_SCHEDULER_ACT_CHILDREN); + assertEquals(1, reqAllocations.size()); + assertTrue(reqAllocations.get(0).getString(FN_ACT_DIAGNOSTIC) + .contains(TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX)); } finally { rm.stop(); } } @Test (timeout=30000) - public void testPlacementConstraintDiagnostic() - throws Exception { + public void testPlacementConstraintDiagnostic() throws Exception { rm.start(); CapacityScheduler cs = (CapacityScheduler)rm.getResourceScheduler(); @@ -728,7 +772,8 @@ public void testPlacementConstraintDiagnostic() response.getType().toString()); JSONObject json = response.getEntity(JSONObject.class); assertEquals("waiting for next allocation", - json.getString("diagnostic")); + getFirstSubNodeFromJson(json, FN_SCHEDULER_ACT_ROOT) + .optString(FN_ACT_DIAGNOSTIC)); // trigger scheduling cs.handle(new NodeUpdateSchedulerEvent( @@ -742,15 +787,17 @@ public void testPlacementConstraintDiagnostic() json = response.getEntity(JSONObject.class); verifyNumberOfAllocations(json, 1); - JSONObject allocationObj = json.getJSONObject("allocations"); + JSONObject allocationObj = getFirstSubNodeFromJson(json, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS); // check diagnostics Predicate findReqPred = - (obj) -> obj.optString("name").equals("request_1_1"); + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_1_1"); List reqObjs = ActivitiesTestUtils.findInAllocations(allocationObj, findReqPred); assertEquals(1, reqObjs.size()); - JSONObject reqChild = reqObjs.get(0).getJSONObject("children"); - assertTrue(reqChild.getString("diagnostic") + JSONObject reqChild = + getFirstSubNodeFromJson(reqObjs.get(0), FN_SCHEDULER_ACT_CHILDREN); + assertTrue(reqChild.getString(FN_ACT_DIAGNOSTIC) .contains(UNMATCHED_PARTITION_OR_PC_DIAGNOSTIC_PREFIX)); } finally { rm.stop(); @@ -758,8 +805,7 @@ public void testPlacementConstraintDiagnostic() } @Test (timeout=30000) - public void testAppInsufficientResourceDiagnostic() - throws Exception { + public void testAppInsufficientResourceDiagnostic() throws Exception { rm.start(); CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); @@ -776,7 +822,7 @@ public void testAppInsufficientResourceDiagnostic() MultivaluedMapImpl params = new MultivaluedMapImpl(); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); assertEquals("waiting for display", - json.getString("diagnostic")); + json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC)); // am1 asks for 1 * 5GB container am1.allocate(Arrays.asList(ResourceRequest @@ -788,24 +834,24 @@ public void testAppInsufficientResourceDiagnostic() json = ActivitiesTestUtils.requestWebResource(r, params); verifyNumberOfAllocations(json, 1); - JSONObject allocationObj = json.getJSONObject("allocations"); + JSONObject allocationObj = getFirstSubNodeFromJson(json, + FN_APP_ACT_ROOT, FN_ACT_ALLOCATIONS); JSONObject requestAllocationObj = - allocationObj.getJSONObject("requestAllocation"); + getFirstSubNodeFromJson(allocationObj, FN_APP_ACT_CHILDREN); verifyNumberOfAllocationAttempts(requestAllocationObj, 1); - JSONObject allocationAttemptObj = - requestAllocationObj.getJSONObject("allocationAttempt"); - verifyStateOfAllocations(allocationAttemptObj, "allocationState", + JSONObject allocationAttemptObj = getFirstSubNodeFromJson( + requestAllocationObj, FN_APP_ACT_CHILDREN); + verifyStateOfAllocations(allocationAttemptObj, FN_ACT_ALLOCATION_STATE, "SKIPPED"); - assertTrue(allocationAttemptObj.optString("diagnostic") - .contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX)); + assertTrue(allocationAttemptObj.optString(FN_ACT_DIAGNOSTIC) + .contains(TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX)); } finally { rm.stop(); } } - @Test (timeout=30000) - public void testAppPlacementConstraintDiagnostic() - throws Exception { + @Test(timeout=30000) + public void testAppPlacementConstraintDiagnostic() throws Exception { rm.start(); CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); @@ -822,7 +868,7 @@ public void testAppPlacementConstraintDiagnostic() MultivaluedMapImpl params = new MultivaluedMapImpl(); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); assertEquals("waiting for display", - json.getString("diagnostic")); + json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC)); // am1 asks for 1 * 5GB container with PC expression: in,node,foo PlacementConstraint pcExpression = PlacementConstraints @@ -840,15 +886,16 @@ public void testAppPlacementConstraintDiagnostic() json = ActivitiesTestUtils.requestWebResource(r, params); verifyNumberOfAllocations(json, 1); - JSONObject allocationObj = json.getJSONObject("allocations"); + JSONObject allocationObj = getFirstSubNodeFromJson(json, + FN_APP_ACT_ROOT, FN_ACT_ALLOCATIONS); JSONObject requestAllocationObj = - allocationObj.getJSONObject("requestAllocation"); + getFirstSubNodeFromJson(allocationObj, FN_APP_ACT_CHILDREN); verifyNumberOfAllocationAttempts(requestAllocationObj, 1); - JSONObject allocationAttemptObj = - requestAllocationObj.getJSONObject("allocationAttempt"); - verifyStateOfAllocations(allocationAttemptObj, "allocationState", + JSONObject allocationAttemptObj = getFirstSubNodeFromJson( + requestAllocationObj, FN_APP_ACT_CHILDREN); + verifyStateOfAllocations(allocationAttemptObj, FN_ACT_ALLOCATION_STATE, "SKIPPED"); - assertTrue(allocationAttemptObj.optString("diagnostic") + assertTrue(allocationAttemptObj.optString(FN_ACT_DIAGNOSTIC) .contains(UNMATCHED_PARTITION_OR_PC_DIAGNOSTIC_PREFIX)); } finally { rm.stop(); @@ -873,7 +920,7 @@ public void testAppFilterByRequestPrioritiesAndAllocationRequestIds() MultivaluedMapImpl params = new MultivaluedMapImpl(); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); assertEquals("waiting for display", - json.getString("diagnostic")); + json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC)); // am1 asks for 1 * 1GB container with requestPriority=-1 // and allocationRequestId=1 @@ -919,23 +966,22 @@ public void testAppFilterByRequestPrioritiesAndAllocationRequestIds() cs.handle(new NodeUpdateSchedulerEvent( rm.getRMContext().getRMNodes().get(nm1.getNodeId()))); - // query app activities with requestPriorities={0,1} + // query app activities with requestPriorities={0,-1} MultivaluedMapImpl filterParams1 = new MultivaluedMapImpl(params); - filterParams1.add(RMWSConsts.REQUEST_PRIORITIES, "-1"); - filterParams1.add(RMWSConsts.REQUEST_PRIORITIES, "0"); + filterParams1.add(RMWSConsts.REQUEST_PRIORITIES, "0,-1"); json = ActivitiesTestUtils.requestWebResource(r, filterParams1); verifyNumberOfAllocations(json, 4); - // query app activities with requestPriorities=0 + // query app activities with requestPriorities=-1 MultivaluedMapImpl filterParams2 = new MultivaluedMapImpl(params); filterParams2.add(RMWSConsts.REQUEST_PRIORITIES, "-1"); json = ActivitiesTestUtils.requestWebResource(r, filterParams2); verifyNumberOfAllocations(json, 2); - JSONArray allocations = json.getJSONArray("allocations"); + JSONArray allocations = + json.getJSONObject(FN_APP_ACT_ROOT).getJSONArray(FN_ACT_ALLOCATIONS); for (int i=0; i()); @@ -1016,19 +1085,20 @@ public void testAppLimit() throws Exception { // query all app activities with invalid limit params.putSingle(RMWSConsts.LIMIT, "STRING"); json = ActivitiesTestUtils.requestWebResource(r, params); - assertEquals("limit must be integer!", json.getString("diagnostic")); + assertEquals("limit must be integer!", + json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC)); // query all app activities with limit = 0 params.putSingle(RMWSConsts.LIMIT, "0"); json = ActivitiesTestUtils.requestWebResource(r, params); assertEquals("limit must be greater than 0!", - json.getString("diagnostic")); + json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC)); // query all app activities with limit < 0 params.putSingle(RMWSConsts.LIMIT, "-3"); json = ActivitiesTestUtils.requestWebResource(r, params); assertEquals("limit must be greater than 0!", - json.getString("diagnostic")); + json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC)); } finally { rm.stop(); } @@ -1052,17 +1122,18 @@ public void testAppActions() throws Exception { params.add("maxTime", 1); //only last for 1 second // testing invalid action - params.add(RMWSConsts.ACTIONS, "get"); - params.add(RMWSConsts.ACTIONS, "invalid-action"); + params.add(RMWSConsts.ACTIONS, "get,invalid-action"); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); - assertTrue(json.getString("diagnostic").startsWith("Got invalid action")); + assertTrue(json.getJSONObject(FN_APP_ACT_ROOT) + .getString(FN_ACT_DIAGNOSTIC).startsWith("Got invalid action")); /* * testing get action */ params.putSingle(RMWSConsts.ACTIONS, "get"); json = ActivitiesTestUtils.requestWebResource(r, params); - assertEquals("waiting for display", json.getString("diagnostic")); + assertEquals("waiting for display", + json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC)); // trigger scheduling cs.handle(new NodeUpdateSchedulerEvent( @@ -1071,7 +1142,8 @@ public void testAppActions() throws Exception { // app activities won't be recorded params.putSingle(RMWSConsts.ACTIONS, "get"); json = ActivitiesTestUtils.requestWebResource(r, params); - assertEquals("waiting for display", json.getString("diagnostic")); + assertEquals("waiting for display", + json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC)); // trigger scheduling cs.handle(new NodeUpdateSchedulerEvent( @@ -1082,8 +1154,8 @@ public void testAppActions() throws Exception { */ params.putSingle(RMWSConsts.ACTIONS, "refresh"); json = ActivitiesTestUtils.requestWebResource(r, params); - assertEquals("Successfully notified actions: refresh", - json.getString("diagnostic")); + assertEquals("Successfully received action: refresh", + json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC)); // trigger scheduling cs.handle(new NodeUpdateSchedulerEvent( @@ -1104,8 +1176,7 @@ public void testAppActions() throws Exception { * testing update and get actions */ params.remove(RMWSConsts.ACTIONS); - params.add(RMWSConsts.ACTIONS, "refresh"); - params.add(RMWSConsts.ACTIONS, "get"); + params.add(RMWSConsts.ACTIONS, "refresh,get"); json = ActivitiesTestUtils.requestWebResource(r, params); verifyNumberOfAllocations(json, 1); @@ -1149,7 +1220,7 @@ public void testAppSummary() throws Exception { MultivaluedMapImpl params = new MultivaluedMapImpl(); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); assertEquals("waiting for display", - json.getString("diagnostic")); + json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC)); MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1); // am1 asks for 1 * 5GB container @@ -1170,23 +1241,191 @@ public void testAppSummary() throws Exception { // verify that response contains an allocation summary for all nodes verifyNumberOfAllocations(json, 1); - JSONObject allocation = json.getJSONObject("allocations"); + JSONObject allocation = getFirstSubNodeFromJson(json, + FN_APP_ACT_ROOT, FN_ACT_ALLOCATIONS); JSONObject reqestAllocation = - allocation.getJSONObject("requestAllocation"); - JSONArray attempts = reqestAllocation.getJSONArray("allocationAttempt"); + getFirstSubNodeFromJson(allocation, FN_APP_ACT_CHILDREN); + JSONArray attempts = reqestAllocation.getJSONArray(FN_APP_ACT_CHILDREN); assertEquals(2, attempts.length()); for (int i = 0; i < attempts.length(); i++) { JSONObject attempt = attempts.getJSONObject(i); - if (attempt.getString("allocationState").equals("SKIPPED")) { - JSONArray nodeIds = attempt.optJSONArray("nodeIds"); - assertEquals(2, nodeIds.length()); - } else if (attempt.getString("allocationState").equals("RESERVED")) { + if (attempt.getString(FN_ACT_ALLOCATION_STATE) + .equals(ActivityState.SKIPPED.name())) { + assertEquals(2, attempt.getJSONArray(FN_ACT_NODE_IDS).length()); + } else if (attempt.getString(FN_ACT_ALLOCATION_STATE) + .equals(ActivityState.RESERVED.name())) { + assertEquals(1, attempt.getJSONArray(FN_ACT_NODE_IDS).length()); assertEquals(nm1.getNodeId().toString(), - attempt.getString("nodeIds")); + attempt.getJSONArray(FN_ACT_NODE_IDS).getString(0)); } } } finally { rm.stop(); } } + + @Test + public void testNodeSkippedBecauseOfRelaxLocality() throws Exception { + //Start RM so that it accepts app submissions + rm.start(); + + MockNM nm1 = new MockNM("127.0.0.1:1234", 4 * 1024, + rm.getResourceTrackerService()); + MockNM nm2 = new MockNM("127.0.0.2:1234", 4 * 1024, + rm.getResourceTrackerService()); + + nm1.registerNode(); + nm2.registerNode(); + + try { + RMApp app1 = rm.submitApp(10, "app1", "user1", null, "b1"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1); + + am1.allocate(Arrays.asList( + ResourceRequest.newBuilder().priority(Priority.UNDEFINED) + .resourceName("127.0.0.2") + .capability(Resources.createResource(1024)).numContainers(1) + .build(), + ResourceRequest.newBuilder().priority(Priority.UNDEFINED) + .resourceName("/default-rack") + .capability(Resources.createResource(1024)).numContainers(1) + .relaxLocality(false) + .build(), + ResourceRequest.newBuilder().priority(Priority.UNDEFINED) + .resourceName("*") + .capability(Resources.createResource(1024)).numContainers(1) + .relaxLocality(false) + .build()), null); + + WebResource r = resource().path(RMWSConsts.RM_WEB_SERVICE_PATH) + .path(ActivitiesTestUtils.format(RMWSConsts.SCHEDULER_APP_ACTIVITIES, + app1.getApplicationId().toString())); + ActivitiesTestUtils.requestWebResource(r, null); + WebResource sr = resource().path(RMWSConsts.RM_WEB_SERVICE_PATH) + .path(RMWSConsts.SCHEDULER_ACTIVITIES); + ActivitiesTestUtils.requestWebResource(sr, null); + + nm1.nodeHeartbeat(true); + Thread.sleep(1000); + + JSONObject appActivitiesJson = + ActivitiesTestUtils.requestWebResource(r, null); + JSONObject schedulerActivitiesJson = + ActivitiesTestUtils.requestWebResource(sr, null); + + // verify app activities + verifyNumberOfAllocations(appActivitiesJson, 1); + List allocationAttempts = ActivitiesTestUtils + .getSubNodesFromJson(appActivitiesJson, FN_APP_ACT_ROOT, + FN_ACT_ALLOCATIONS, FN_APP_ACT_CHILDREN, FN_APP_ACT_CHILDREN); + assertEquals(1, allocationAttempts.size()); + assertEquals( + ActivityDiagnosticConstant.NODE_SKIPPED_BECAUSE_OF_RELAX_LOCALITY, + allocationAttempts.get(0).optString(FN_ACT_DIAGNOSTIC)); + + /* + * verify scheduler activities + */ + verifyNumberOfAllocations(schedulerActivitiesJson, 1); + // verify request activity + Predicate findA1AQueuePred = + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_-1_-1"); + List reqObjs = ActivitiesTestUtils.findInAllocations( + getFirstSubNodeFromJson(schedulerActivitiesJson, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS), + findA1AQueuePred); + assertEquals(1, reqObjs.size()); + assertEquals(ActivityState.SKIPPED.name(), + reqObjs.get(0).optString(FN_ACT_ALLOCATION_STATE)); + // verify node activity + JSONObject nodeObj = + getFirstSubNodeFromJson(reqObjs.get(0), FN_SCHEDULER_ACT_CHILDREN); + assertEquals(nm1.getNodeId().toString(), + nodeObj.optString(FN_ACT_NODE_ID)); + assertEquals( + ActivityDiagnosticConstant.NODE_SKIPPED_BECAUSE_OF_RELAX_LOCALITY, + nodeObj.optString(FN_ACT_DIAGNOSTIC)); + } finally { + rm.stop(); + } + } + + @Test + public void testQueueSkippedBecauseOfHeadroom() throws Exception { + //Start RM so that it accepts app submissions + rm.start(); + + MockNM nm1 = new MockNM("127.0.0.1:1234", 4 * 1024, + rm.getResourceTrackerService()); + MockNM nm2 = new MockNM("127.0.0.2:1234", 4 * 1024, + rm.getResourceTrackerService()); + nm1.registerNode(); + nm2.registerNode(); + + try { + RMApp app1 = rm.submitApp(10, "app1", "user1", null, "a1a"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1); + + am1.allocate(Arrays.asList( + ResourceRequest.newBuilder().priority(Priority.UNDEFINED) + .resourceName("*").capability(Resources.createResource(3072)) + .numContainers(1).relaxLocality(false).build()), null); + + WebResource r = resource().path(RMWSConsts.RM_WEB_SERVICE_PATH) + .path(ActivitiesTestUtils.format(RMWSConsts.SCHEDULER_APP_ACTIVITIES, + app1.getApplicationId().toString())); + ActivitiesTestUtils.requestWebResource(r, null); + WebResource sr = resource().path(RMWSConsts.RM_WEB_SERVICE_PATH) + .path(RMWSConsts.SCHEDULER_ACTIVITIES); + ActivitiesTestUtils.requestWebResource(sr, null); + + + nm1.nodeHeartbeat(true); + Thread.sleep(1000); + + JSONObject appActivitiesJson = + ActivitiesTestUtils.requestWebResource(r, null); + JSONObject schedulerActivitiesJson = + ActivitiesTestUtils.requestWebResource(sr, null); + + // verify app activities: diagnostic should be attached at request level + // and there should be no allocation attempts at node level + verifyNumberOfAllocations(appActivitiesJson, 1); + List requestAllocations = ActivitiesTestUtils + .getSubNodesFromJson(appActivitiesJson, FN_APP_ACT_ROOT, + FN_ACT_ALLOCATIONS, FN_APP_ACT_CHILDREN); + assertEquals(1, requestAllocations.size()); + assertEquals(ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM, + requestAllocations.get(0).optString(FN_ACT_DIAGNOSTIC)); + assertFalse(requestAllocations.get(0).has(FN_APP_ACT_CHILDREN)); + + // verify scheduler activities: diagnostic should be attached at request + // level and queue level + verifyNumberOfAllocations(schedulerActivitiesJson, 1); + // verify at queue level + Predicate findA1AQueuePred = + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("a1a"); + List a1aQueueObj = ActivitiesTestUtils.findInAllocations( + getFirstSubNodeFromJson(schedulerActivitiesJson, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS), findA1AQueuePred); + assertEquals(1, a1aQueueObj.size()); + assertEquals(ActivityState.REJECTED.name(), + a1aQueueObj.get(0).optString(FN_ACT_ALLOCATION_STATE)); + assertTrue(a1aQueueObj.get(0).optString(FN_ACT_DIAGNOSTIC).startsWith( + ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM)); + // verify at request level + Predicate findReqPred = + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_-1_-1"); + List reqObj = ActivitiesTestUtils.findInAllocations( + getFirstSubNodeFromJson(schedulerActivitiesJson, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS), findReqPred); + assertEquals(1, reqObj.size()); + assertEquals(ActivityState.REJECTED.name(), + reqObj.get(0).optString(FN_ACT_ALLOCATION_STATE)); + assertTrue(reqObj.get(0).optString(FN_ACT_DIAGNOSTIC).startsWith( + ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM)); + } finally { + rm.stop(); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled.java index 8998221238..85b418ce6d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled.java @@ -57,8 +57,21 @@ import java.util.List; import java.util.function.Predicate; -import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATIONS; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATION_STATE; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_COUNT; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_DIAGNOSTIC; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_FINAL_ALLOCATION_STATE; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_NODE_IDS; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_APP_ACT_CHILDREN; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_APP_ACT_ROOT; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_CHILDREN; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_NAME; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_ROOT; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.findInAllocations; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.getFirstSubNodeFromJson; +import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.getSubNodesFromJson; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyNumberOfAllocationAttempts; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyNumberOfAllocations; import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyStateOfAllocations; @@ -184,9 +197,10 @@ public void testAssignContainer() throws Exception { verifyNumberOfAllocations(json, 1); - JSONObject allocations = json.getJSONObject("allocations"); + JSONObject allocations = getFirstSubNodeFromJson(json, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS); verifyStateOfAllocations(allocations, - "finalAllocationState", "ALLOCATED"); + FN_ACT_FINAL_ALLOCATION_STATE, "ALLOCATED"); } finally { rm.stop(); } @@ -225,9 +239,10 @@ public void testSchedulingWithoutPendingRequests() JSONObject json = response.getEntity(JSONObject.class); verifyNumberOfAllocations(json, 1); - JSONObject allocations = json.getJSONObject("allocations"); - verifyStateOfAllocations(allocations, - "finalAllocationState", "SKIPPED"); + JSONObject allocation = getFirstSubNodeFromJson(json, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS); + verifyStateOfAllocations(allocation, + FN_ACT_FINAL_ALLOCATION_STATE, "SKIPPED"); } finally { rm.stop(); } @@ -254,7 +269,8 @@ public void testAppAssignContainer() throws Exception { app1.getApplicationId().toString())); MultivaluedMapImpl params = new MultivaluedMapImpl(); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); - assertEquals("waiting for display", json.getString("diagnostic")); + assertEquals("waiting for display", + json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC)); //Trigger scheduling for this app CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); @@ -267,22 +283,24 @@ public void testAppAssignContainer() throws Exception { verifyNumberOfAllocations(json, 1); - JSONObject allocationObj = json.getJSONObject("allocations"); - verifyStateOfAllocations(allocationObj, "allocationState", "ALLOCATED"); + JSONObject allocationObj = getFirstSubNodeFromJson(json, + FN_APP_ACT_ROOT, FN_ACT_ALLOCATIONS); + verifyStateOfAllocations(allocationObj, FN_ACT_ALLOCATION_STATE, + "ALLOCATED"); JSONObject requestAllocationObj = - allocationObj.getJSONObject("requestAllocation"); + getFirstSubNodeFromJson(allocationObj, FN_APP_ACT_CHILDREN); verifyNumberOfAllocationAttempts(requestAllocationObj, 2); - verifyStateOfAllocations(requestAllocationObj, "allocationState", + verifyStateOfAllocations(requestAllocationObj, FN_ACT_ALLOCATION_STATE, "ALLOCATED"); JSONArray allocationAttemptArray = - requestAllocationObj.getJSONArray("allocationAttempt"); + requestAllocationObj.getJSONArray(FN_APP_ACT_CHILDREN); JSONObject allocationAttempt1 = allocationAttemptArray.getJSONObject(0); - verifyStateOfAllocations(allocationAttempt1, "allocationState", + verifyStateOfAllocations(allocationAttempt1, FN_ACT_ALLOCATION_STATE, "SKIPPED"); - assertTrue(allocationAttempt1.optString("diagnostic") - .contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX)); + assertTrue(allocationAttempt1.optString(FN_ACT_DIAGNOSTIC) + .contains(TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX)); JSONObject allocationAttempt2 = allocationAttemptArray.getJSONObject(1); - verifyStateOfAllocations(allocationAttempt2, "allocationState", + verifyStateOfAllocations(allocationAttempt2, FN_ACT_ALLOCATION_STATE, "ALLOCATED"); } finally { rm.stop(); @@ -313,7 +331,8 @@ public void testInsufficientResourceDiagnostic() throws Exception { assertEquals(MediaType.APPLICATION_JSON_TYPE + "; " + JettyUtils.UTF_8, response.getType().toString()); JSONObject json = response.getEntity(JSONObject.class); - assertEquals("waiting for next allocation", json.getString("diagnostic")); + assertEquals("waiting for next allocation", + json.getJSONObject(FN_SCHEDULER_ACT_ROOT).getString("diagnostic")); //Request a container for am2, will reserve a container on nm1 am2.allocate("*", 4096, 1, new ArrayList<>()); @@ -329,29 +348,32 @@ public void testInsufficientResourceDiagnostic() throws Exception { //Check app activities verifyNumberOfAllocations(json, 1); - JSONObject allocationObj = json.getJSONObject("allocations"); + JSONObject allocationObj = getFirstSubNodeFromJson(json, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS); //Check diagnostic for request of app1 - Predicate findApp1Pred = (obj) -> obj.optString("name") - .equals(app1.getApplicationId().toString()); + Predicate findApp1Pred = + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME) + .equals(app1.getApplicationId().toString()); JSONObject app1Obj = findInAllocations(allocationObj, findApp1Pred).get(0); - assertEquals("SKIPPED", app1Obj.optString("allocationState")); + assertEquals("SKIPPED", app1Obj.optString(FN_ACT_ALLOCATION_STATE)); assertEquals(ActivityDiagnosticConstant.APPLICATION_DO_NOT_NEED_RESOURCE, - app1Obj.optString("diagnostic")); + app1Obj.optString(FN_ACT_DIAGNOSTIC)); //Check diagnostic for request of app2 Predicate findApp2ReqPred = - (obj) -> obj.optString("name").equals("request_1_-1"); + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_1_-1"); List app2ReqObjs = findInAllocations(allocationObj, findApp2ReqPred); assertEquals(1, app2ReqObjs.size()); - JSONArray app2ReqChildren = app2ReqObjs.get(0).getJSONArray("children"); + JSONArray app2ReqChildren = + app2ReqObjs.get(0).getJSONArray(FN_SCHEDULER_ACT_CHILDREN); assertEquals(4, app2ReqChildren.length()); for (int i = 0; i < app2ReqChildren.length(); i++) { JSONObject reqChild = app2ReqChildren.getJSONObject(i); - if (reqChild.getString("allocationState").equals("SKIPPED")) { - String diagnostic = reqChild.getString("diagnostic"); - assertTrue( - diagnostic.contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX)); + if (reqChild.getString(FN_ACT_ALLOCATION_STATE).equals("SKIPPED")) { + String diagnostic = reqChild.getString(FN_ACT_DIAGNOSTIC); + assertTrue(diagnostic + .contains(TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX)); } } } finally { @@ -378,7 +400,8 @@ public void testAppInsufficientResourceDiagnostic() throws Exception { app1.getApplicationId().toString())); MultivaluedMapImpl params = new MultivaluedMapImpl(); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); - assertEquals("waiting for display", json.getString("diagnostic")); + assertEquals("waiting for display", + json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC)); //Request two containers with different priority for am1 am1.allocate(Arrays.asList(ResourceRequest @@ -398,32 +421,35 @@ public void testAppInsufficientResourceDiagnostic() throws Exception { //Check app activities json = ActivitiesTestUtils.requestWebResource(r, params); verifyNumberOfAllocations(json, 2); - JSONArray allocationArray = json.getJSONArray("allocations"); + JSONArray allocationArray = + json.getJSONObject(FN_APP_ACT_ROOT).getJSONArray(FN_ACT_ALLOCATIONS); //Check first activity is for second allocation with RESERVED state JSONObject allocationObj = allocationArray.getJSONObject(0); - verifyStateOfAllocations(allocationObj, "allocationState", "RESERVED"); + verifyStateOfAllocations(allocationObj, FN_ACT_ALLOCATION_STATE, + "RESERVED"); JSONObject requestAllocationObj = - allocationObj.getJSONObject("requestAllocation"); + getFirstSubNodeFromJson(allocationObj, FN_APP_ACT_CHILDREN); verifyNumberOfAllocationAttempts(requestAllocationObj, 4); JSONArray allocationAttemptArray = - requestAllocationObj.getJSONArray("allocationAttempt"); + requestAllocationObj.getJSONArray(FN_APP_ACT_CHILDREN); for (int i=0; i()); @@ -472,27 +500,29 @@ public void testGroupByDiagnostics() throws Exception { //Check activities verifyNumberOfAllocations(json, 1); - JSONObject allocationObj = json.getJSONObject("allocations"); + JSONObject allocationObj = getFirstSubNodeFromJson(json, + FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS); //Check diagnostic for request of app1 Predicate findReqPred = - (obj) -> obj.optString("name").equals("request_1_-1"); + (obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_1_-1"); List reqObjs = findInAllocations(allocationObj, findReqPred); assertEquals(1, reqObjs.size()); - JSONArray reqChildren = reqObjs.get(0).getJSONArray("children"); + JSONArray reqChildren = + reqObjs.get(0).getJSONArray(FN_SCHEDULER_ACT_CHILDREN); assertEquals(2, reqChildren.length()); for (int i = 0; i < reqChildren.length(); i++) { JSONObject reqChild = reqChildren.getJSONObject(i); - if (reqChild.getString("allocationState") + if (reqChild.getString(FN_ACT_ALLOCATION_STATE) .equals(AllocationState.SKIPPED.name())) { - assertEquals("3", reqChild.getString("count")); - assertEquals(3, reqChild.getJSONArray("nodeIds").length()); - assertTrue(reqChild.optString("diagnostic") - .contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX)); - } else if (reqChild.getString("allocationState") + assertEquals("3", reqChild.getString(FN_ACT_COUNT)); + assertEquals(3, reqChild.getJSONArray(FN_ACT_NODE_IDS).length()); + assertTrue(reqChild.optString(FN_ACT_DIAGNOSTIC) + .contains(TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX)); + } else if (reqChild.getString(FN_ACT_ALLOCATION_STATE) .equals(AllocationState.RESERVED.name())) { - assertEquals("1", reqChild.getString("count")); - assertNotNull(reqChild.getString("nodeIds")); + assertEquals("1", reqChild.getString(FN_ACT_COUNT)); + assertNotNull(reqChild.getString(FN_ACT_NODE_IDS)); } else { Assert.fail("Allocation state should be " + AllocationState.SKIPPED.name() + " or " @@ -528,7 +558,8 @@ public void testAppGroupByDiagnostics() throws Exception { */ params.add(RMWSConsts.GROUP_BY, "NON-EXIST-GROUP-BY"); JSONObject json = ActivitiesTestUtils.requestWebResource(r, params); - Assert.assertTrue(json.getString("diagnostic") + Assert.assertTrue(json.getJSONObject(FN_APP_ACT_ROOT) + .getString(FN_ACT_DIAGNOSTIC) .startsWith("Got invalid groupBy:")); params.remove(RMWSConsts.GROUP_BY); @@ -538,7 +569,8 @@ public void testAppGroupByDiagnostics() throws Exception { params.add(RMWSConsts.GROUP_BY, RMWSConsts.ActivitiesGroupBy. DIAGNOSTIC.name().toLowerCase()); json = ActivitiesTestUtils.requestWebResource(r, params); - assertEquals("waiting for display", json.getString("diagnostic")); + assertEquals("waiting for display", json.getJSONObject(FN_APP_ACT_ROOT) + .getString(FN_ACT_DIAGNOSTIC)); //Request two containers with different priority for am1 am1.allocate(Arrays.asList(ResourceRequest @@ -559,29 +591,31 @@ public void testAppGroupByDiagnostics() throws Exception { //Check app activities verifyNumberOfAllocations(json, 2); - JSONArray allocationArray = json.getJSONArray("allocations"); + List allocations = getSubNodesFromJson(json, + FN_APP_ACT_ROOT, FN_ACT_ALLOCATIONS); //Check first activity is for second allocation with RESERVED state - JSONObject allocationObj = allocationArray.getJSONObject(0); - verifyStateOfAllocations(allocationObj, "allocationState", "RESERVED"); + JSONObject allocationObj = allocations.get(0); + verifyStateOfAllocations(allocationObj, FN_ACT_ALLOCATION_STATE, + "RESERVED"); JSONObject requestAllocationObj = - allocationObj.getJSONObject("requestAllocation"); + getFirstSubNodeFromJson(allocationObj, FN_APP_ACT_CHILDREN); verifyNumberOfAllocationAttempts(requestAllocationObj, 2); JSONArray allocationAttemptArray = - requestAllocationObj.getJSONArray("allocationAttempt"); + requestAllocationObj.getJSONArray(FN_APP_ACT_CHILDREN); for (int i=0; i