From 1cb2eb0df30d4fbaa090c68022833063f3d225cc Mon Sep 17 00:00:00 2001 From: Inigo Goiri Date: Mon, 20 May 2019 17:22:58 -0700 Subject: [PATCH] HDFS-14353. Erasure Coding: metrics xmitsInProgress become to negative. Contributed by maobaolong. --- .../server/datanode/erasurecode/ErasureCodingWorker.java | 4 ++++ .../datanode/erasurecode/StripedBlockReconstructor.java | 6 +++++- .../server/datanode/erasurecode/StripedReconstructor.java | 4 ++++ .../org/apache/hadoop/hdfs/TestReconstructStripedFile.java | 6 ++++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java index f9063b7a89..f4506cf470 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java @@ -170,4 +170,8 @@ public void shutDown() { stripedReconstructionPool.shutdown(); stripedReadPool.shutdown(); } + + public float getXmitWeight() { + return xmitWeight; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java index 29c0078e95..1af2380886 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedBlockReconstructor.java @@ -67,7 +67,11 @@ public void run() { LOG.warn("Failed to reconstruct striped block: {}", getBlockGroup(), e); getDatanode().getMetrics().incrECFailedReconstructionTasks(); } finally { - getDatanode().decrementXmitsInProgress(getXmits()); + float xmitWeight = getErasureCodingWorker().getXmitWeight(); + // if the xmits is smaller than 1, the xmitsSubmitted should be set to 1 + // because if it set to zero, we cannot to measure the xmits submitted + int xmitsSubmitted = Math.max((int) (getXmits() * xmitWeight), 1); + getDatanode().decrementXmitsInProgress(xmitsSubmitted); final DataNodeMetrics metrics = getDatanode().getMetrics(); metrics.incrECReconstructionTasks(); metrics.incrECReconstructionBytesRead(getBytesRead()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructor.java index a1f4c7ff55..4c8be827f4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/StripedReconstructor.java @@ -275,4 +275,8 @@ Configuration getConf() { DataNode getDatanode() { return datanode; } + + public ErasureCodingWorker getErasureCodingWorker() { + return erasureCodingWorker; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java index 2abfff7876..0b490b5c3b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReconstructStripedFile.java @@ -514,6 +514,8 @@ private void testNNSendsErasureCodingTasks(int deadDN) throws Exception { @Test(timeout = 180000) public void testErasureCodingWorkerXmitsWeight() throws Exception { + testErasureCodingWorkerXmitsWeight(0.5f, + (int) (ecPolicy.getNumDataUnits() * 0.5f)); testErasureCodingWorkerXmitsWeight(1f, ecPolicy.getNumDataUnits()); testErasureCodingWorkerXmitsWeight(0f, 1); testErasureCodingWorkerXmitsWeight(10f, 10 * ecPolicy.getNumDataUnits()); @@ -567,6 +569,10 @@ public void stripedBlockReconstruction() throws IOException { } finally { barrier.await(); DataNodeFaultInjector.set(oldInjector); + for (final DataNode curDn : cluster.getDataNodes()) { + GenericTestUtils.waitFor(() -> curDn.getXceiverCount() > 1, 10, 60000); + assertEquals(0, curDn.getXmitsInProgress()); + } } } }