From 09763925025a3709e6098186348e1afd80cb9f71 Mon Sep 17 00:00:00 2001 From: Weiwei Yang Date: Thu, 6 Jun 2019 19:55:03 +0800 Subject: [PATCH] YARN-9590. Correct incompatible, incomplete and redundant activities. Contributed by Tao Yang. --- .../scheduler/activities/ActivitiesLogger.java | 6 +++--- .../scheduler/activities/ActivitiesManager.java | 4 +++- .../scheduler/capacity/CapacityScheduler.java | 6 +++++- .../resourcemanager/scheduler/capacity/LeafQueue.java | 3 +++ .../allocator/AbstractContainerAllocator.java | 11 ++++------- .../capacity/allocator/RegularContainerAllocator.java | 8 +++++++- .../resourcemanager/webapp/dao/AppAllocationInfo.java | 6 ++++++ .../webapp/TestRMWebServicesSchedulerActivities.java | 4 ++-- ...vicesSchedulerActivitiesWithMultiNodesEnabled.java | 6 +++--- 9 files changed, 36 insertions(+), 18 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java index 58b6c613c9..e698d1a4d0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java @@ -102,20 +102,20 @@ public static void recordAppActivityWithoutAllocation( // Add application-container activity into specific node allocation. activitiesManager.addSchedulingActivityForNode(nodeId, requestName, null, - priorityStr, ActivityState.SKIPPED, diagnostic, type, + priorityStr, appState, diagnostic, type, null); type = "request"; // Add application-container activity into specific node allocation. activitiesManager.addSchedulingActivityForNode(nodeId, application.getApplicationId().toString(), requestName, - priorityStr, ActivityState.SKIPPED, + priorityStr, appState, ActivityDiagnosticConstant.EMPTY, type, allocationRequestId); } // Add queue-application activity into specific node allocation. activitiesManager.addSchedulingActivityForNode(nodeId, application.getQueueName(), application.getApplicationId().toString(), - application.getPriority().toString(), ActivityState.SKIPPED, + application.getPriority().toString(), appState, schedulerKey != null ? ActivityDiagnosticConstant.EMPTY : diagnostic, "app", null); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java index b8ef263747..2c314727c9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java @@ -339,8 +339,10 @@ void finishAppAllocationRecording(ApplicationId applicationId, appAllocations = curAppAllocations; } } - if (appAllocations.size() == appActivitiesMaxQueueLength) { + int curQueueLength = appAllocations.size(); + while (curQueueLength >= appActivitiesMaxQueueLength) { appAllocations.poll(); + --curQueueLength; } appAllocations.add(appAllocation); Long stopTime = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 57ee69026f..e59abee6b5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -1597,7 +1597,7 @@ private void allocateFromReservedContainer(FiCaSchedulerNode node, } else{ ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, queue.getParent().getQueueName(), queue.getQueueName(), - ActivityState.ACCEPTED, ActivityDiagnosticConstant.EMPTY); + ActivityState.SKIPPED, ActivityDiagnosticConstant.EMPTY); ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager, node, reservedContainer.getContainerId(), AllocationState.SKIPPED); } @@ -1687,6 +1687,10 @@ private CSAssignment allocateContainersOnMultiNodes( } LOG.debug("This node or this node partition doesn't have available or " + "killable resource"); + ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, null, + "", getRootQueue().getQueueName(), ActivityState.REJECTED, + ActivityDiagnosticConstant.NOT_ABLE_TO_ACCESS_PARTITION + " " + + candidates.getPartition()); return null; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index ca61dc6bad..a178f9e9a0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -1188,6 +1188,9 @@ public CSAssignment assignContainers(Resource clusterResource, application.updateNodeInfoForAMDiagnostics(node); } else if (assignment.getSkippedType() == CSAssignment.SkippedType.QUEUE_LIMIT) { + ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node, + getParent().getQueueName(), getQueueName(), ActivityState.SKIPPED, + ActivityDiagnosticConstant.QUEUE_SKIPPED_HEADROOM); return assignment; } else{ // If we don't allocate anything, and it is not skipped by application, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/AbstractContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/AbstractContainerAllocator.java index 829625220e..90b088efdf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/AbstractContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/AbstractContainerAllocator.java @@ -109,16 +109,10 @@ protected CSAssignment getCSAssignmentFromAllocateResult( allocatedResource); if (rmContainer != null) { - ActivitiesLogger.APP.recordAppActivityWithAllocation( - activitiesManager, node, application, updatedContainer, - ActivityState.RE_RESERVED); ActivitiesLogger.APP.finishSkippedAppAllocationRecording( activitiesManager, application.getApplicationId(), ActivityState.SKIPPED, ActivityDiagnosticConstant.EMPTY); } else { - ActivitiesLogger.APP.recordAppActivityWithAllocation( - activitiesManager, node, application, updatedContainer, - ActivityState.RESERVED); ActivitiesLogger.APP.finishAllocatedAppAllocationRecording( activitiesManager, application.getApplicationId(), updatedContainer.getContainerId(), ActivityState.RESERVED, @@ -149,7 +143,7 @@ protected CSAssignment getCSAssignmentFromAllocateResult( node, application, updatedContainer, ActivityState.ALLOCATED); ActivitiesLogger.APP.finishAllocatedAppAllocationRecording( activitiesManager, application.getApplicationId(), - updatedContainer.getContainerId(), ActivityState.ACCEPTED, + updatedContainer.getContainerId(), ActivityState.ALLOCATED, ActivityDiagnosticConstant.EMPTY); // Update unformed resource @@ -162,6 +156,9 @@ protected CSAssignment getCSAssignmentFromAllocateResult( assignment.setSkippedType( CSAssignment.SkippedType.QUEUE_LIMIT); } + ActivitiesLogger.APP.finishSkippedAppAllocationRecording( + activitiesManager, application.getApplicationId(), + ActivityState.SKIPPED, ActivityDiagnosticConstant.EMPTY); } return assignment; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java index 1f9f6eb7ee..2643fd0b7a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java @@ -628,6 +628,12 @@ private ContainerAllocation assignContainer(Resource clusterResource, } } + ActivitiesLogger.APP.recordAppActivityWithoutAllocation( + activitiesManager, node, application, schedulerKey, + ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE + + getResourceDiagnostics(capability, availableForDC), + rmContainer == null ? + ActivityState.RESERVED : ActivityState.RE_RESERVED); ContainerAllocation result = new ContainerAllocation(null, pendingAsk.getPerAllocationResource(), AllocationState.RESERVED); result.containerNodeType = type; @@ -824,7 +830,7 @@ private ContainerAllocation allocate(Resource clusterResource, ActivityDiagnosticConstant. APPLICATION_PRIORITY_DO_NOT_NEED_RESOURCE); return new ContainerAllocation(reservedContainer, null, - AllocationState.QUEUE_SKIPPED); + AllocationState.PRIORITY_SKIPPED); } result = ContainerAllocation.PRIORITY_SKIPPED; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAllocationInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAllocationInfo.java index da2be57184..6b0d86ba92 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAllocationInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAllocationInfo.java @@ -43,6 +43,7 @@ public class AppAllocationInfo { private long timestamp; private String dateTime; private String allocationState; + private String diagnostic; private List requestAllocation; AppAllocationInfo() { @@ -57,6 +58,7 @@ public class AppAllocationInfo { this.timestamp = allocation.getTime(); this.dateTime = new Date(allocation.getTime()).toString(); this.allocationState = allocation.getAppState().name(); + this.diagnostic = allocation.getDiagnostic(); Map> requestToActivityNodes = allocation.getAllocationAttempts().stream().collect(Collectors .groupingBy((e) -> e.getRequestPriority() + "_" + e @@ -96,4 +98,8 @@ public String getAllocationState() { public List getRequestAllocation() { return requestAllocation; } + + public String getDiagnostic() { + return diagnostic; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivities.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivities.java index 7650f7acf1..1e08f05e13 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivities.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivities.java @@ -460,7 +460,7 @@ public void testAppActivityJSON() throws Exception { //Check app activities verifyNumberOfAllocations(json, 1); JSONObject allocations = json.getJSONObject("allocations"); - verifyStateOfAllocations(allocations, "allocationState", "ACCEPTED"); + verifyStateOfAllocations(allocations, "allocationState", "ALLOCATED"); //Check request allocation JSONObject requestAllocationObj = allocations.getJSONObject("requestAllocation"); @@ -527,7 +527,7 @@ public void testAppAssignMultipleContainersPerNodeHeartbeat() JSONArray allocations = json.getJSONArray("allocations"); for (int i = 0; i < allocations.length(); i++) { verifyStateOfAllocations(allocations.getJSONObject(i), - "allocationState", "ACCEPTED"); + "allocationState", "ALLOCATED"); } } finally { rm.stop(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled.java index 525925bb4d..8383a0d28c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled.java @@ -277,7 +277,7 @@ public void testAppAssignContainer() throws Exception { verifyNumberOfAllocations(json, 1); JSONObject allocationObj = json.getJSONObject("allocations"); - verifyStateOfAllocations(allocationObj, "allocationState", "ACCEPTED"); + verifyStateOfAllocations(allocationObj, "allocationState", "ALLOCATED"); JSONObject requestAllocationObj = allocationObj.getJSONObject("requestAllocation"); verifyNumberOfAllocationAttempts(requestAllocationObj, 2); @@ -437,7 +437,7 @@ public void testAppInsufficientResourceDiagnostic() throws Exception { } // check second activity is for first allocation with ALLOCATED state allocationObj = allocationArray.getJSONObject(1); - verifyStateOfAllocations(allocationObj, "allocationState", "ACCEPTED"); + verifyStateOfAllocations(allocationObj, "allocationState", "ALLOCATED"); requestAllocationObj = allocationObj.getJSONObject("requestAllocation"); verifyNumberOfAllocationAttempts(requestAllocationObj, 1); verifyStateOfAllocations(requestAllocationObj, "allocationState", @@ -610,7 +610,7 @@ public void testAppGroupByDiagnostics() throws Exception { } // check second activity is for first allocation with ALLOCATED state allocationObj = allocationArray.getJSONObject(1); - verifyStateOfAllocations(allocationObj, "allocationState", "ACCEPTED"); + verifyStateOfAllocations(allocationObj, "allocationState", "ALLOCATED"); requestAllocationObj = allocationObj.getJSONObject("requestAllocation"); verifyNumberOfAllocationAttempts(requestAllocationObj, 1); verifyStateOfAllocations(requestAllocationObj, "allocationState",