diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 385a07d154..6e5d3f5f79 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -317,6 +317,9 @@ Trunk (Unreleased)
HDFS-6374. setXAttr should require the user to be the owner of the file
or directory (Charles Lamb via wang)
+ HDFS-6110 adding more slow action log in critical write path
+ (Liang Xie via stack)
+
Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index 5c7095931a..4d2da799e1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -276,6 +276,7 @@ public static class Conf {
final int retryTimesForGetLastBlockLength;
final int retryIntervalForGetLastBlockLength;
final long datanodeRestartTimeout;
+ final long dfsclientSlowIoWarningThresholdMs;
final boolean useLegacyBlockReader;
final boolean useLegacyBlockReaderLocal;
@@ -430,6 +431,9 @@ public Conf(Configuration conf) {
datanodeRestartTimeout = conf.getLong(
DFS_CLIENT_DATANODE_RESTART_TIMEOUT_KEY,
DFS_CLIENT_DATANODE_RESTART_TIMEOUT_DEFAULT) * 1000;
+ dfsclientSlowIoWarningThresholdMs = conf.getLong(
+ DFSConfigKeys.DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_KEY,
+ DFSConfigKeys.DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_DEFAULT);
}
public boolean isUseLegacyBlockReaderLocal() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index e9a435873c..5835f21131 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -645,5 +645,12 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final int DFS_NFS_REGISTRATION_PORT_DEFAULT = 40; // Currently unassigned.
public static final String DFS_NFS_ALLOW_INSECURE_PORTS_KEY = "dfs.nfs.allow.insecure.ports";
public static final boolean DFS_NFS_ALLOW_INSECURE_PORTS_DEFAULT = true;
-
+
+ // Slow io warning log threshold settings for dfsclient and datanode.
+ public static final String DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_KEY =
+ "dfs.client.slow.io.warning.threshold.ms";
+ public static final long DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_DEFAULT = 30000;
+ public static final String DFS_DATANODE_SLOW_IO_WARNING_THRESHOLD_KEY =
+ "dfs.datanode.slow.io.warning.threshold.ms";
+ public static final long DFS_DATANODE_SLOW_IO_WARNING_THRESHOLD_DEFAULT = 300;
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
index e2bc316f02..8ae750d8e8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
@@ -122,6 +122,7 @@ public class DFSOutputStream extends FSOutputSummer
implements Syncable, CanSetDropBehind {
private static final int MAX_PACKETS = 80; // each packet 64K, total 5MB
private final DFSClient dfsClient;
+ private final long dfsclientSlowLogThresholdMs;
private Socket s;
// closed is accessed by different threads under different locks.
private volatile boolean closed = false;
@@ -788,11 +789,19 @@ public void run() {
// process responses from datanodes.
try {
// read an ack from the pipeline
+ long begin = Time.monotonicNow();
ack.readFields(blockReplyStream);
- if (DFSClient.LOG.isDebugEnabled()) {
+ long duration = Time.monotonicNow() - begin;
+ if (duration > dfsclientSlowLogThresholdMs
+ && ack.getSeqno() != Packet.HEART_BEAT_SEQNO) {
+ DFSClient.LOG
+ .warn("Slow ReadProcessor read fields took " + duration
+ + "ms (threshold=" + dfsclientSlowLogThresholdMs + "ms); ack: "
+ + ack + ", targets: " + Arrays.asList(targets));
+ } else if (DFSClient.LOG.isDebugEnabled()) {
DFSClient.LOG.debug("DFSClient " + ack);
}
-
+
long seqno = ack.getSeqno();
// processes response status from datanodes.
for (int i = ack.getNumOfReplies()-1; i >=0 && dfsClient.clientRunning; i--) {
@@ -1570,6 +1579,8 @@ private DFSOutputStream(DFSClient dfsClient, String src, Progressable progress,
}
this.checksum = checksum;
+ this.dfsclientSlowLogThresholdMs =
+ dfsClient.getConf().dfsclientSlowIoWarningThresholdMs;
}
/** Construct a new output stream for creating a file. */
@@ -2001,6 +2012,7 @@ private void waitForAckedSeqno(long seqno) throws IOException {
if (DFSClient.LOG.isDebugEnabled()) {
DFSClient.LOG.debug("Waiting for ack for: " + seqno);
}
+ long begin = Time.monotonicNow();
try {
synchronized (dataQueue) {
while (!closed) {
@@ -2020,6 +2032,11 @@ private void waitForAckedSeqno(long seqno) throws IOException {
checkClosed();
} catch (ClosedChannelException e) {
}
+ long duration = Time.monotonicNow() - begin;
+ if (duration > dfsclientSlowLogThresholdMs) {
+ DFSClient.LOG.warn("Slow waitForAckedSeqno took " + duration
+ + "ms (threshold=" + dfsclientSlowLogThresholdMs + "ms)");
+ }
}
private synchronized void start() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
index a7643b571e..e36d40ccae 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
@@ -69,7 +69,7 @@ class BlockReceiver implements Closeable {
@VisibleForTesting
static long CACHE_DROP_LAG_BYTES = 8 * 1024 * 1024;
-
+ private final long datanodeSlowLogThresholdMs;
private DataInputStream in = null; // from where data are read
private DataChecksum clientChecksum; // checksum used by client
private DataChecksum diskChecksum; // checksum we write to disk
@@ -140,7 +140,7 @@ class BlockReceiver implements Closeable {
this.isDatanode = clientname.length() == 0;
this.isClient = !this.isDatanode;
this.restartBudget = datanode.getDnConf().restartReplicaExpiry;
-
+ this.datanodeSlowLogThresholdMs = datanode.getDnConf().datanodeSlowIoWarningThresholdMs;
//for datanode, we have
//1: clientName.length() == 0, and
//2: stage == null or PIPELINE_SETUP_CREATE
@@ -335,6 +335,7 @@ public void close() throws IOException {
*/
void flushOrSync(boolean isSync) throws IOException {
long flushTotalNanos = 0;
+ long begin = Time.monotonicNow();
if (checksumOut != null) {
long flushStartNanos = System.nanoTime();
checksumOut.flush();
@@ -363,6 +364,12 @@ void flushOrSync(boolean isSync) throws IOException {
datanode.metrics.incrFsyncCount();
}
}
+ long duration = Time.monotonicNow() - begin;
+ if (duration > datanodeSlowLogThresholdMs) {
+ LOG.warn("Slow flushOrSync took " + duration + "ms (threshold="
+ + datanodeSlowLogThresholdMs + "ms), isSync:" + isSync + ", flushTotalNanos="
+ + flushTotalNanos + "ns");
+ }
}
/**
@@ -488,8 +495,14 @@ private int receivePacket() throws IOException {
//First write the packet to the mirror:
if (mirrorOut != null && !mirrorError) {
try {
+ long begin = Time.monotonicNow();
packetReceiver.mirrorPacketTo(mirrorOut);
mirrorOut.flush();
+ long duration = Time.monotonicNow() - begin;
+ if (duration > datanodeSlowLogThresholdMs) {
+ LOG.warn("Slow BlockReceiver write packet to mirror took " + duration
+ + "ms (threshold=" + datanodeSlowLogThresholdMs + "ms)");
+ }
} catch (IOException e) {
handleMirrorOutError(e);
}
@@ -572,7 +585,13 @@ private int receivePacket() throws IOException {
int numBytesToDisk = (int)(offsetInBlock-onDiskLen);
// Write data to disk.
+ long begin = Time.monotonicNow();
out.write(dataBuf.array(), startByteToDisk, numBytesToDisk);
+ long duration = Time.monotonicNow() - begin;
+ if (duration > datanodeSlowLogThresholdMs) {
+ LOG.warn("Slow BlockReceiver write data to disk cost:" + duration
+ + "ms (threshold=" + datanodeSlowLogThresholdMs + "ms)");
+ }
// If this is a partial chunk, then verify that this is the only
// chunk in the packet. Calculate new crc for this chunk.
@@ -638,6 +657,7 @@ private void manageWriterOsCache(long offsetInBlock) {
try {
if (outFd != null &&
offsetInBlock > lastCacheManagementOffset + CACHE_DROP_LAG_BYTES) {
+ long begin = Time.monotonicNow();
//
// For SYNC_FILE_RANGE_WRITE, we want to sync from
// lastCacheManagementOffset to a position "two windows ago"
@@ -670,6 +690,11 @@ private void manageWriterOsCache(long offsetInBlock) {
NativeIO.POSIX.POSIX_FADV_DONTNEED);
}
lastCacheManagementOffset = offsetInBlock;
+ long duration = Time.monotonicNow() - begin;
+ if (duration > datanodeSlowLogThresholdMs) {
+ LOG.warn("Slow manageWriterOsCache took " + duration
+ + "ms (threshold=" + datanodeSlowLogThresholdMs + "ms)");
+ }
}
} catch (Throwable t) {
LOG.warn("Error managing cache for writer of block " + block, t);
@@ -1299,9 +1324,15 @@ private void sendAckUpstreamUnprotected(PipelineAck ack, long seqno,
replicaInfo.setBytesAcked(offsetInBlock);
}
// send my ack back to upstream datanode
+ long begin = Time.monotonicNow();
replyAck.write(upstreamOut);
upstreamOut.flush();
- if (LOG.isDebugEnabled()) {
+ long duration = Time.monotonicNow() - begin;
+ if (duration > datanodeSlowLogThresholdMs) {
+ LOG.warn("Slow PacketResponder send ack to upstream took " + duration
+ + "ms (threshold=" + datanodeSlowLogThresholdMs + "ms), " + myString
+ + ", replyAck=" + replyAck);
+ } else if (LOG.isDebugEnabled()) {
LOG.debug(myString + ", replyAck=" + replyAck);
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java
index d72c3b692c..27dba1baca 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java
@@ -79,6 +79,8 @@ public class DNConf {
final long deleteReportInterval;
final long initialBlockReportDelay;
final long cacheReportInterval;
+ final long dfsclientSlowIoWarningThresholdMs;
+ final long datanodeSlowIoWarningThresholdMs;
final int writePacketSize;
final String minimumNameNodeVersion;
@@ -129,7 +131,14 @@ public DNConf(Configuration conf) {
DFS_BLOCKREPORT_SPLIT_THRESHOLD_DEFAULT);
this.cacheReportInterval = conf.getLong(DFS_CACHEREPORT_INTERVAL_MSEC_KEY,
DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT);
-
+
+ this.dfsclientSlowIoWarningThresholdMs = conf.getLong(
+ DFSConfigKeys.DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_KEY,
+ DFSConfigKeys.DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_DEFAULT);
+ this.datanodeSlowIoWarningThresholdMs = conf.getLong(
+ DFSConfigKeys.DFS_DATANODE_SLOW_IO_WARNING_THRESHOLD_KEY,
+ DFSConfigKeys.DFS_DATANODE_SLOW_IO_WARNING_THRESHOLD_DEFAULT);
+
long initBRDelay = conf.getLong(
DFS_BLOCKREPORT_INITIAL_DELAY_KEY,
DFS_BLOCKREPORT_INITIAL_DELAY_DEFAULT) * 1000L;
@@ -168,7 +177,7 @@ public DNConf(Configuration conf) {
DFS_DATANODE_RESTART_REPLICA_EXPIRY_KEY,
DFS_DATANODE_RESTART_REPLICA_EXPIRY_DEFAULT) * 1000L;
}
-
+
// We get minimumNameNodeVersion via a method so it can be mocked out in tests.
String getMinimumNameNodeVersion() {
return this.minimumNameNodeVersion;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index e1ca17d564..34b36a5c38 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -1930,4 +1930,22 @@
+
+ dfs.client.slow.io.warning.threshold.ms
+ 30000
+ The threshold in milliseconds at which we will log a slow
+ io warning in a dfsclient. By default, this parameter is set to 30000
+ milliseconds (30 seconds).
+
+
+
+
+ dfs.datanode.slow.io.warning.threshold.ms
+ 300
+ The threshold in milliseconds at which we will log a slow
+ io warning in a datanode. By default, this parameter is set to 300
+ milliseconds.
+
+
+