diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GcTimeMonitor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GcTimeMonitor.java
index 0640fc01e2..4247eb7050 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GcTimeMonitor.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GcTimeMonitor.java
@@ -23,6 +23,7 @@ import com.google.common.base.Preconditions;
import java.lang.management.GarbageCollectorMXBean;
import java.lang.management.ManagementFactory;
import java.util.List;
+import java.util.concurrent.TimeUnit;
/**
* This class monitors the percentage of time the JVM is paused in GC within
@@ -46,6 +47,52 @@ public class GcTimeMonitor extends Thread {
private final GcData curData = new GcData();
private volatile boolean shouldRun = true;
+ public static class Builder {
+
+ private long observationWindowMs = TimeUnit.MINUTES.toMillis(1);
+ private long sleepIntervalMs = TimeUnit.SECONDS.toMillis(5);
+ private int maxGcTimePercentage = 100;
+ private GcTimeAlertHandler handler = null;
+
+ /**
+ * Set observation window size in milliseconds.
+ */
+ public Builder observationWindowMs(long value) {
+ this.observationWindowMs = value;
+ return this;
+ }
+
+ /**
+ * Set sleep interval in milliseconds.
+ */
+ public Builder sleepIntervalMs(long value) {
+ this.sleepIntervalMs = value;
+ return this;
+ }
+
+ /**
+ * Set the max GC time percentage that triggers the alert handler.
+ */
+ public Builder maxGcTimePercentage(int value) {
+ this.maxGcTimePercentage = value;
+ return this;
+ }
+
+ /**
+ * Set the GC alert handler.
+ */
+ public Builder gcTimeAlertHandler(GcTimeAlertHandler value) {
+ this.handler = value;
+ return this;
+ }
+
+ public GcTimeMonitor build() {
+ return new GcTimeMonitor(observationWindowMs, sleepIntervalMs,
+ maxGcTimePercentage, handler);
+ }
+ }
+
+
/**
* Create an instance of GCTimeMonitor. Once it's started, it will stay alive
* and monitor GC time percentage until shutdown() is called. If you don't
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
index 2d0f23293b..bafdfddf16 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md
@@ -56,6 +56,7 @@ Each metrics record contains tags such as ProcessName, SessionID and Hostname as
| `GcNumWarnThresholdExceeded` | Number of times that the GC warn threshold is exceeded |
| `GcNumInfoThresholdExceeded` | Number of times that the GC info threshold is exceeded |
| `GcTotalExtraSleepTime` | Total GC extra sleep time in msec |
+| `GcTimePercentage` | The percentage (0..100) of time that the JVM spent in GC pauses within the observation window if `dfs.namenode.gc.time.monitor.enable` is set to true. Use `dfs.namenode.gc.time.monitor.sleep.interval.ms` to specify the sleep interval in msec. Use `dfs.namenode.gc.time.monitor.observation.window.ms` to specify the observation window in msec. |
rpc context
===========
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 459b9f8e35..bb8039c2d9 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -1069,6 +1069,21 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final String DFS_NAMENODE_BLOCKPLACEMENTPOLICY_DEFAULT_PREFER_LOCAL_NODE_KEY =
"dfs.namenode.block-placement-policy.default.prefer-local-node";
public static final boolean DFS_NAMENODE_BLOCKPLACEMENTPOLICY_DEFAULT_PREFER_LOCAL_NODE_DEFAULT = true;
+ public static final String DFS_NAMENODE_GC_TIME_MONITOR_ENABLE =
+ "dfs.namenode.gc.time.monitor.enable";
+ public static final boolean DFS_NAMENODE_GC_TIME_MONITOR_ENABLE_DEFAULT =
+ true;
+ public static final String
+ DFS_NAMENODE_GC_TIME_MONITOR_OBSERVATION_WINDOW_MS =
+ "dfs.namenode.gc.time.monitor.observation.window.ms";
+ public static final long
+ DFS_NAMENODE_GC_TIME_MONITOR_OBSERVATION_WINDOW_MS_DEFAULT =
+ TimeUnit.MINUTES.toMillis(1);
+ public static final String DFS_NAMENODE_GC_TIME_MONITOR_SLEEP_INTERVAL_MS =
+ "dfs.namenode.gc.time.monitor.sleep.interval.ms";
+ public static final long
+ DFS_NAMENODE_GC_TIME_MONITOR_SLEEP_INTERVAL_MS_DEFAULT =
+ TimeUnit.SECONDS.toMillis(5);
public static final String DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY = "dfs.block.local-path-access.user";
public static final String DFS_DOMAIN_SOCKET_PATH_KEY =
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 66c5de6c48..2a74190995 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -96,6 +96,8 @@ import org.apache.hadoop.util.JvmPauseMonitor;
import org.apache.hadoop.util.ServicePlugin;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
+import org.apache.hadoop.util.GcTimeMonitor;
+import org.apache.hadoop.util.GcTimeMonitor.Builder;
import org.apache.htrace.core.Tracer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -176,6 +178,12 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_STRE
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_STREAMS_HARD_LIMIT_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_GC_TIME_MONITOR_SLEEP_INTERVAL_MS;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_GC_TIME_MONITOR_SLEEP_INTERVAL_MS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_GC_TIME_MONITOR_OBSERVATION_WINDOW_MS;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_GC_TIME_MONITOR_OBSERVATION_WINDOW_MS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_GC_TIME_MONITOR_ENABLE;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_GC_TIME_MONITOR_ENABLE_DEFAULT;
import static org.apache.hadoop.util.ExitUtil.terminate;
import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
@@ -411,6 +419,7 @@ public class NameNode extends ReconfigurableBase implements
private NameNodeRpcServer rpcServer;
private JvmPauseMonitor pauseMonitor;
+ private GcTimeMonitor gcTimeMonitor;
private ObjectName nameNodeStatusBeanName;
protected final Tracer tracer;
protected final TracerConfigurationManager tracerConfigurationManager;
@@ -724,6 +733,22 @@ public class NameNode extends ReconfigurableBase implements
pauseMonitor.start();
metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);
+ if (conf.getBoolean(DFS_NAMENODE_GC_TIME_MONITOR_ENABLE,
+ DFS_NAMENODE_GC_TIME_MONITOR_ENABLE_DEFAULT)) {
+ long observationWindow = conf.getTimeDuration(
+ DFS_NAMENODE_GC_TIME_MONITOR_OBSERVATION_WINDOW_MS,
+ DFS_NAMENODE_GC_TIME_MONITOR_OBSERVATION_WINDOW_MS_DEFAULT,
+ TimeUnit.MILLISECONDS);
+ long sleepInterval = conf.getTimeDuration(
+ DFS_NAMENODE_GC_TIME_MONITOR_SLEEP_INTERVAL_MS,
+ DFS_NAMENODE_GC_TIME_MONITOR_SLEEP_INTERVAL_MS_DEFAULT,
+ TimeUnit.MILLISECONDS);
+ gcTimeMonitor = new Builder().observationWindowMs(observationWindow)
+ .sleepIntervalMs(sleepInterval).build();
+ gcTimeMonitor.start();
+ metrics.getJvmMetrics().setGcTimeMonitor(gcTimeMonitor);
+ }
+
if (NamenodeRole.NAMENODE == role) {
startHttpServer(conf);
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index abb8dec925..ad556c60b6 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -5761,4 +5761,34 @@
Determines the namenode automatic lease recovery interval in seconds.
+
+
+ dfs.namenode.gc.time.monitor.enable
+ true
+
+ Enable the GcTimePercentage metrics in NameNode's JvmMetrics. It will
+ start a thread(GcTimeMonitor) computing the metric.
+
+
+
+
+ dfs.namenode.gc.time.monitor.observation.window.ms
+ 1m
+
+ Determines the windows size of GcTimeMonitor. A window is a period of time
+ starts at now-windowSize and ends at now. The GcTimePercentage is the gc
+ time proportion of the window.
+
+
+
+
+ dfs.namenode.gc.time.monitor.sleep.interval.ms
+ 5s
+
+ Determines the sleep interval in the window. The GcTimeMonitor wakes up in
+ the sleep interval periodically to compute the gc time proportion. The
+ shorter the interval the preciser the GcTimePercentage. The sleep interval
+ must be shorter than the window size.
+
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
index d9cd4cedf0..1eab42a270 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hdfs.client.HdfsAdmin;
import static org.apache.hadoop.fs.CommonConfigurationKeys.HA_HM_RPC_TIMEOUT_DEFAULT;
import static org.apache.hadoop.fs.CommonConfigurationKeys.HA_HM_RPC_TIMEOUT_KEY;
+import static org.apache.hadoop.metrics2.source.JvmMetricsInfo.GcTimePercentage;
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
import static org.apache.hadoop.test.MetricsAsserts.assertCounterGt;
import static org.apache.hadoop.test.MetricsAsserts.assertGauge;
@@ -103,6 +104,7 @@ public class TestNameNodeMetrics {
new Path("/testNameNodeMetrics");
private static final String NN_METRICS = "NameNodeActivity";
private static final String NS_METRICS = "FSNamesystem";
+ private static final String JVM_METRICS = "JvmMetrics";
private static final int BLOCK_SIZE = 1024 * 1024;
private static final ErasureCodingPolicy EC_POLICY =
SystemErasureCodingPolicies.getByID(
@@ -223,6 +225,15 @@ public class TestNameNodeMetrics {
capacityTotal);
}
+ /**
+ * Test the GcTimePercentage could be got successfully.
+ */
+ @Test
+ public void testGcTimePercentageMetrics() throws Exception {
+ MetricsRecordBuilder rb = getMetrics(JVM_METRICS);
+ MetricsAsserts.getIntGauge(GcTimePercentage.name(), rb);
+ }
+
/** Test metrics indicating the number of stale DataNodes */
@Test
public void testStaleNodes() throws Exception {