diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 1e336b7c74..421e5ea613 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -112,6 +112,9 @@ Release 2.7.0 - UNRELEASED YARN-2136. Changed RMStateStore to ignore store opearations when fenced. (Varun Saxena via jianhe) + YARN-1156. Enhance NodeManager AllocatedGB and AvailableGB metrics + for aggregation of decimal values. (Tsuyoshi OZAWA via junping_du) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java index a3637d5168..beaafe192a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java @@ -47,6 +47,9 @@ public class NodeManagerMetrics { @Metric("Container launch duration") MutableRate containerLaunchDuration; + private long allocatedMB; + private long availableMB; + public static NodeManagerMetrics create() { return create(DefaultMetricsSystem.instance()); } @@ -92,22 +95,27 @@ public void endRunningContainer() { public void allocateContainer(Resource res) { allocatedContainers.incr(); - allocatedGB.incr(res.getMemory() / 1024); - availableGB.decr(res.getMemory() / 1024); + allocatedMB = allocatedMB + res.getMemory(); + allocatedGB.set((int)Math.ceil(allocatedMB/1024d)); + availableMB = availableMB - res.getMemory(); + availableGB.set((int)Math.floor(availableMB/1024d)); allocatedVCores.incr(res.getVirtualCores()); availableVCores.decr(res.getVirtualCores()); } public void releaseContainer(Resource res) { allocatedContainers.decr(); - allocatedGB.decr(res.getMemory() / 1024); - availableGB.incr(res.getMemory() / 1024); + allocatedMB = allocatedMB - res.getMemory(); + allocatedGB.set((int)Math.ceil(allocatedMB/1024d)); + availableMB = availableMB + res.getMemory(); + availableGB.set((int)Math.floor(availableMB/1024d)); allocatedVCores.decr(res.getVirtualCores()); availableVCores.incr(res.getVirtualCores()); } public void addResource(Resource res) { - availableGB.incr(res.getMemory() / 1024); + availableMB = availableMB + res.getMemory(); + availableGB.incr((int)Math.floor(availableMB/1024d)); availableVCores.incr(res.getVirtualCores()); } @@ -118,4 +126,5 @@ public void addContainerLaunchDuration(long value) { public int getRunningContainers() { return containersRunning.value(); } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java index 9906e3622d..d2a0691617 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java @@ -33,13 +33,14 @@ public class TestNodeManagerMetrics { total.setMemory(8*GiB); total.setVirtualCores(16); Resource resource = Records.newRecord(Resource.class); - resource.setMemory(1*GiB); + resource.setMemory(512); //512MiB resource.setVirtualCores(2); metrics.addResource(total); - for (int i = 5; i-- > 0;) { + for (int i = 10; i-- > 0;) { + // allocate 10 containers(allocatedGB: 5GiB, availableGB: 3GiB) metrics.launchedContainer(); metrics.allocateContainer(resource); } @@ -48,6 +49,7 @@ public class TestNodeManagerMetrics { metrics.endInitingContainer(); metrics.runningContainer(); metrics.endRunningContainer(); + // Releasing 3 containers(allocatedGB: 3.5GiB, availableGB: 4.5GiB) metrics.completedContainer(); metrics.releaseContainer(resource); @@ -61,12 +63,17 @@ public class TestNodeManagerMetrics { metrics.runningContainer(); metrics.addContainerLaunchDuration(1); - checkMetrics(5, 1, 1, 1, 1, 1, 2, 2, 6, 4, 12); + // availableGB is expected to be floored, + // while allocatedGB is expected to be ceiled. + // allocatedGB: 3.5GB allocated memory is shown as 4GB + // availableGB: 4.5GB available memory is shown as 4GB + checkMetrics(10, 1, 1, 1, 1, 1, 4, 7, 4, 14, 2); } private void checkMetrics(int launched, int completed, int failed, int killed, - int initing, int running, int allocatedGB, - int allocatedContainers, int availableGB, int allocatedVCores, int availableVCores) { + int initing, int running, int allocatedGB, + int allocatedContainers, int availableGB, int allocatedVCores, + int availableVCores) { MetricsRecordBuilder rb = getMetrics("NodeManagerMetrics"); assertCounter("ContainersLaunched", launched, rb); assertCounter("ContainersCompleted", completed, rb);