From 173310e2f57823789543d4725a51f9557527fb64 Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Mon, 19 Oct 2020 10:34:17 +0530 Subject: [PATCH] HDFS-15629. Add seqno when warning slow mirror/disk in BlockReceiver. Contributed by Haibin Huang. --- .../hdfs/server/datanode/BlockReceiver.java | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java index cc9551d1d1..b6970020b9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java @@ -409,7 +409,7 @@ boolean packetSentInTime() { * Flush block data and metadata files to disk. * @throws IOException */ - void flushOrSync(boolean isSync) throws IOException { + void flushOrSync(boolean isSync, long seqno) throws IOException { long flushTotalNanos = 0; long begin = Time.monotonicNow(); if (checksumOut != null) { @@ -448,7 +448,8 @@ void flushOrSync(boolean isSync) throws IOException { LOG.warn("Slow flushOrSync took " + duration + "ms (threshold=" + datanodeSlowLogThresholdMs + "ms), isSync:" + isSync + ", flushTotalNanos=" + flushTotalNanos + "ns, volume=" + getVolumeBaseUri() - + ", blockId=" + replicaInfo.getBlockId()); + + ", blockId=" + replicaInfo.getBlockId() + + ", seqno=" + seqno); } } @@ -606,7 +607,8 @@ private int receivePacket() throws IOException { LOG.warn("Slow BlockReceiver write packet to mirror took " + duration + "ms (threshold=" + datanodeSlowLogThresholdMs + "ms), " + "downstream DNs=" + Arrays.toString(downstreamDNs) - + ", blockId=" + replicaInfo.getBlockId()); + + ", blockId=" + replicaInfo.getBlockId() + + ", seqno=" + seqno); } } catch (IOException e) { handleMirrorOutError(e); @@ -622,7 +624,7 @@ private int receivePacket() throws IOException { } // sync block if requested if (syncBlock) { - flushOrSync(true); + flushOrSync(true, seqno); } } else { final int checksumLen = diskChecksum.getChecksumSize(len); @@ -741,7 +743,8 @@ private int receivePacket() throws IOException { LOG.warn("Slow BlockReceiver write data to disk cost:" + duration + "ms (threshold=" + datanodeSlowLogThresholdMs + "ms), " + "volume=" + getVolumeBaseUri() - + ", blockId=" + replicaInfo.getBlockId()); + + ", blockId=" + replicaInfo.getBlockId() + + ", seqno=" + seqno); } if (duration > maxWriteToDiskMs) { @@ -814,14 +817,14 @@ private int receivePacket() throws IOException { } /// flush entire packet, sync if requested - flushOrSync(syncBlock); + flushOrSync(syncBlock, seqno); replicaInfo.setLastChecksumAndDataLen(offsetInBlock, lastCrc); datanode.metrics.incrBytesWritten(len); datanode.metrics.incrTotalWriteTime(duration); - manageWriterOsCache(offsetInBlock); + manageWriterOsCache(offsetInBlock, seqno); } } catch (IOException iex) { // Volume error check moved to FileIoProvider @@ -887,7 +890,7 @@ private static byte[] copyLastChunkChecksum(byte[] array, int size, int end) { return Arrays.copyOfRange(array, end - size, end); } - private void manageWriterOsCache(long offsetInBlock) { + private void manageWriterOsCache(long offsetInBlock, long seqno) { try { if (streams.getOutFd() != null && offsetInBlock > lastCacheManagementOffset + CACHE_DROP_LAG_BYTES) { @@ -934,7 +937,8 @@ private void manageWriterOsCache(long offsetInBlock) { LOG.warn("Slow manageWriterOsCache took " + duration + "ms (threshold=" + datanodeSlowLogThresholdMs + "ms), volume=" + getVolumeBaseUri() - + ", blockId=" + replicaInfo.getBlockId()); + + ", blockId=" + replicaInfo.getBlockId() + + ", seqno=" + seqno); } } } catch (Throwable t) { @@ -1646,7 +1650,8 @@ private void sendAckUpstreamUnprotected(PipelineAck ack, long seqno, + "ms (threshold=" + datanodeSlowLogThresholdMs + "ms), " + myString + ", replyAck=" + replyAck + ", downstream DNs=" + Arrays.toString(downstreamDNs) - + ", blockId=" + replicaInfo.getBlockId()); + + ", blockId=" + replicaInfo.getBlockId() + + ", seqno=" + seqno); } else if (LOG.isDebugEnabled()) { LOG.debug(myString + ", replyAck=" + replyAck); }