From 89d1fd5dac4bccf42d82686e146b02eb60d14736 Mon Sep 17 00:00:00 2001 From: Wangda Tan Date: Tue, 19 Jan 2016 21:26:38 +0800 Subject: [PATCH] HADOOP-12356. Fix computing CPU usage statistics on Windows. (Inigo Goiri via wangda) --- .../hadoop-common/CHANGES.txt | 3 ++ .../java/org/apache/hadoop/util/SysInfo.java | 11 +++- .../org/apache/hadoop/util/SysInfoLinux.java | 15 +++++- .../apache/hadoop/util/SysInfoWindows.java | 26 ++++++++-- .../apache/hadoop/util/TestSysInfoLinux.java | 18 +++++-- .../hadoop/util/TestSysInfoWindows.java | 51 +++++++++++++++++-- .../DummyResourceCalculatorPlugin.java | 2 +- .../yarn/util/ResourceCalculatorPlugin.java | 15 ++++-- .../nodemanager/NodeResourceMonitorImpl.java | 4 +- .../monitor/MockResourceCalculatorPlugin.java | 2 +- .../util/TestNodeManagerHardwareUtils.java | 2 +- 11 files changed, 124 insertions(+), 25 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index b13876a11b..bd6550c0c9 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -1622,6 +1622,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12700. Remove unused import in TestCompressorDecompressor.java. (John Zhuge via aajisaka) + HADOOP-12356. Fix computing CPU usage statistics on Windows. + (Inigo Goiri via wangda) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfo.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfo.java index b75a8d8a15..e8a571489e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfo.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfo.java @@ -104,9 +104,16 @@ public static SysInfo newInstance() { /** * Obtain the CPU usage % of the machine. Return -1 if it is unavailable * - * @return CPU usage as a percentage of available cycles. + * @return CPU usage as a percentage (from 0 to 100) of available cycles. */ - public abstract float getCpuUsage(); + public abstract float getCpuUsagePercentage(); + + /** + * Obtain the number of VCores used. Return -1 if it is unavailable + * + * @return Number of VCores used a percentage (from 0 to #VCores). + */ + public abstract float getNumVCoresUsed(); /** * Obtain the aggregated number of bytes read over the network. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java index 6b21f18419..2852bea042 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java @@ -608,7 +608,7 @@ public long getCumulativeCpuTime() { /** {@inheritDoc} */ @Override - public float getCpuUsage() { + public float getCpuUsagePercentage() { readProcStatFile(); float overallCpuUsage = cpuTimeTracker.getCpuTrackerUsagePercent(); if (overallCpuUsage != CpuTimeTracker.UNAVAILABLE) { @@ -617,6 +617,17 @@ public float getCpuUsage() { return overallCpuUsage; } + /** {@inheritDoc} */ + @Override + public float getNumVCoresUsed() { + readProcStatFile(); + float overallVCoresUsage = cpuTimeTracker.getCpuTrackerUsagePercent(); + if (overallVCoresUsage != CpuTimeTracker.UNAVAILABLE) { + overallVCoresUsage = overallVCoresUsage / 100F; + } + return overallVCoresUsage; + } + /** {@inheritDoc} */ @Override public long getNetworkBytesRead() { @@ -676,7 +687,7 @@ public static void main(String[] args) { } catch (InterruptedException e) { // do nothing } - System.out.println("CPU usage % : " + plugin.getCpuUsage()); + System.out.println("CPU usage % : " + plugin.getCpuUsagePercentage()); } @VisibleForTesting diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoWindows.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoWindows.java index 3b009efa8d..b65569b8cd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoWindows.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoWindows.java @@ -104,8 +104,13 @@ void refreshIfNeeded() { cpuFrequencyKhz = Long.parseLong(sysInfo[5]); cumulativeCpuTimeMs = Long.parseLong(sysInfo[6]); if (lastCumCpuTimeMs != -1) { + /** + * This number will be the aggregated usage across all cores in + * [0.0, 100.0]. For example, it will be 400.0 if there are 8 + * cores and each of them is running at 50% utilization. + */ cpuUsage = (cumulativeCpuTimeMs - lastCumCpuTimeMs) - / (refreshInterval * 1.0f); + * 100F / refreshInterval; } } catch (NumberFormatException nfe) { LOG.warn("Error parsing sysInfo", nfe); @@ -175,9 +180,24 @@ public long getCumulativeCpuTime() { /** {@inheritDoc} */ @Override - public float getCpuUsage() { + public float getCpuUsagePercentage() { refreshIfNeeded(); - return cpuUsage; + float ret = cpuUsage; + if (ret != -1) { + ret = ret / numProcessors; + } + return ret; + } + + /** {@inheritDoc} */ + @Override + public float getNumVCoresUsed() { + refreshIfNeeded(); + float ret = cpuUsage; + if (ret != -1) { + ret = ret / 100F; + } + return ret; } /** {@inheritDoc} */ diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoLinux.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoLinux.java index 83f2e86efc..47fef3307d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoLinux.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoLinux.java @@ -229,7 +229,10 @@ public void parsingProcStatAndCpuFile() throws IOException { updateStatFile(uTime, nTime, sTime); assertEquals(plugin.getCumulativeCpuTime(), FAKE_JIFFY_LENGTH * (uTime + nTime + sTime)); - assertEquals(plugin.getCpuUsage(), (float)(CpuTimeTracker.UNAVAILABLE),0.0); + assertEquals(plugin.getCpuUsagePercentage(), + (float)(CpuTimeTracker.UNAVAILABLE),0.0); + assertEquals(plugin.getNumVCoresUsed(), + (float)(CpuTimeTracker.UNAVAILABLE),0.0); // Advance the time and sample again to test the CPU usage calculation uTime += 100L; @@ -237,13 +240,15 @@ public void parsingProcStatAndCpuFile() throws IOException { updateStatFile(uTime, nTime, sTime); assertEquals(plugin.getCumulativeCpuTime(), FAKE_JIFFY_LENGTH * (uTime + nTime + sTime)); - assertEquals(plugin.getCpuUsage(), 6.25F, 0.0); + assertEquals(plugin.getCpuUsagePercentage(), 6.25F, 0.0); + assertEquals(plugin.getNumVCoresUsed(), 0.5F, 0.0); - // Advance the time and sample again. This time, we call getCpuUsage() only. + // Advance the time and sample again. This time, we call getCpuUsagePercentage() only. uTime += 600L; plugin.advanceTime(300L); updateStatFile(uTime, nTime, sTime); - assertEquals(plugin.getCpuUsage(), 25F, 0.0); + assertEquals(plugin.getCpuUsagePercentage(), 25F, 0.0); + assertEquals(plugin.getNumVCoresUsed(), 2F, 0.0); // Advance very short period of time (one jiffy length). // In this case, CPU usage should not be updated. @@ -252,7 +257,10 @@ public void parsingProcStatAndCpuFile() throws IOException { updateStatFile(uTime, nTime, sTime); assertEquals(plugin.getCumulativeCpuTime(), FAKE_JIFFY_LENGTH * (uTime + nTime + sTime)); - assertEquals(plugin.getCpuUsage(), 25F, 0.0); // CPU usage is not updated. + assertEquals( + plugin.getCpuUsagePercentage(), 25F, 0.0); // CPU usage is not updated. + assertEquals( + plugin.getNumVCoresUsed(), 2F, 0.0); // CPU usage is not updated. } /** diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoWindows.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoWindows.java index 7924c02a31..2544e7cfbd 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoWindows.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoWindows.java @@ -58,7 +58,10 @@ public void parseSystemInfoString() { assertEquals(2805000L, tester.getCpuFrequency()); assertEquals(6261812L, tester.getCumulativeCpuTime()); // undef on first call - assertEquals(-1.0, tester.getCpuUsage(), 0.0); + assertEquals((float)CpuTimeTracker.UNAVAILABLE, + tester.getCpuUsagePercentage(), 0.0); + assertEquals((float)CpuTimeTracker.UNAVAILABLE, + tester.getNumVCoresUsed(), 0.0); } @Test(timeout = 10000) @@ -70,22 +73,60 @@ public void refreshAndCpuUsage() throws InterruptedException { tester.getAvailablePhysicalMemorySize(); // verify information has been refreshed assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize()); - assertEquals(-1.0, tester.getCpuUsage(), 0.0); + assertEquals((float)CpuTimeTracker.UNAVAILABLE, + tester.getCpuUsagePercentage(), 0.0); + assertEquals((float)CpuTimeTracker.UNAVAILABLE, + tester.getNumVCoresUsed(), 0.0); tester.setSysinfoString( "17177038848,8589467648,15232745472,5400417792,1,2805000,6263012\r\n"); tester.getAvailablePhysicalMemorySize(); // verify information has not been refreshed assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize()); - assertEquals(-1.0, tester.getCpuUsage(), 0.0); + assertEquals((float)CpuTimeTracker.UNAVAILABLE, + tester.getCpuUsagePercentage(), 0.0); + assertEquals((float)CpuTimeTracker.UNAVAILABLE, + tester.getNumVCoresUsed(), 0.0); // advance clock tester.advance(SysInfoWindows.REFRESH_INTERVAL_MS + 1); // verify information has been refreshed assertEquals(5400417792L, tester.getAvailablePhysicalMemorySize()); - assertEquals((6263012 - 6261812) / (SysInfoWindows.REFRESH_INTERVAL_MS + 1f), - tester.getCpuUsage(), 0.0); + assertEquals((6263012 - 6261812) * 100F / + (SysInfoWindows.REFRESH_INTERVAL_MS + 1f) / 1, + tester.getCpuUsagePercentage(), 0.0); + assertEquals((6263012 - 6261812) / + (SysInfoWindows.REFRESH_INTERVAL_MS + 1f) / 1, + tester.getNumVCoresUsed(), 0.0); + } + + @Test(timeout = 10000) + public void refreshAndCpuUsageMulticore() throws InterruptedException { + // test with 12 cores + SysInfoWindowsMock tester = new SysInfoWindowsMock(); + tester.setSysinfoString( + "17177038848,8589467648,15232745472,6400417792,12,2805000,6261812\r\n"); + // verify information has been refreshed + assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize()); + + tester.setSysinfoString( + "17177038848,8589467648,15232745472,5400417792,12,2805000,6263012\r\n"); + // verify information has not been refreshed + assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize()); + + // advance clock + tester.advance(SysInfoWindows.REFRESH_INTERVAL_MS + 1); + + // verify information has been refreshed + assertEquals(5400417792L, tester.getAvailablePhysicalMemorySize()); + // verify information has been refreshed + assertEquals((6263012 - 6261812) * 100F / + (SysInfoWindows.REFRESH_INTERVAL_MS + 1f) / 12, + tester.getCpuUsagePercentage(), 0.0); + assertEquals((6263012 - 6261812) / + (SysInfoWindows.REFRESH_INTERVAL_MS + 1f), + tester.getNumVCoresUsed(), 0.0); } @Test(timeout = 10000) diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DummyResourceCalculatorPlugin.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DummyResourceCalculatorPlugin.java index 4999f14d7a..01d9242b0a 100644 --- a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DummyResourceCalculatorPlugin.java +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DummyResourceCalculatorPlugin.java @@ -120,7 +120,7 @@ public long getCumulativeCpuTime() { /** {@inheritDoc} */ @Override - public float getCpuUsage() { + public float getCpuUsagePercentage() { return getConf().getFloat(CPU_USAGE, -1); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorPlugin.java index 691c4ac03a..7b2ea5616e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorPlugin.java @@ -120,12 +120,21 @@ public long getCumulativeCpuTime() { } /** - * Obtain the CPU usage % of the machine. Return -1 if it is unavailable + * Obtain the CPU usage % of the machine. Return -1 if it is unavailable. * * @return CPU usage in % */ - public float getCpuUsage() { - return sys.getCpuUsage(); + public float getCpuUsagePercentage() { + return sys.getCpuUsagePercentage(); + } + + /** + * Obtain the number of VCores used. Return -1 if it is unavailable. + * + * @return Number of VCores used a percentage (from 0 to #VCores) + */ + public float getNumVCoresUsed() { + return sys.getNumVCoresUsed(); } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java index b161f14003..3a78d8730d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java @@ -141,12 +141,12 @@ public void run() { long vmem = resourceCalculatorPlugin.getVirtualMemorySize() - resourceCalculatorPlugin.getAvailableVirtualMemorySize(); - float cpu = resourceCalculatorPlugin.getCpuUsage(); + float vcores = resourceCalculatorPlugin.getNumVCoresUsed(); nodeUtilization = ResourceUtilization.newInstance( (int) (pmem >> 20), // B -> MB (int) (vmem >> 20), // B -> MB - cpu); // 1 CPU at 100% is 1 + vcores); // Used Virtual Cores try { Thread.sleep(monitoringInterval); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java index 4a18a8c93e..bbde9ed7c5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java @@ -63,7 +63,7 @@ public long getCumulativeCpuTime() { } @Override - public float getCpuUsage() { + public float getCpuUsagePercentage() { return 0; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java index 84a045dbb2..4add586bbf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java @@ -73,7 +73,7 @@ public long getCumulativeCpuTime() { } @Override - public float getCpuUsage() { + public float getCpuUsagePercentage() { return 0; }