From c99c3379282779e11ebda88d845bb89407f2f350 Mon Sep 17 00:00:00 2001 From: Uma Maheswara Rao G Date: Thu, 14 May 2015 11:27:48 +0530 Subject: [PATCH] HDFS-8391. NN should consider current EC tasks handling count from DN while assigning new tasks. Contributed by Uma Maheswara Rao G. --- .../hadoop-hdfs/CHANGES-HDFS-EC-7285.txt | 3 +++ .../hadoop/hdfs/server/datanode/DataNode.java | 19 +++++++++++++++++-- .../erasurecode/ErasureCodingWorker.java | 4 +++- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt index 190ddd64a1..1456434231 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt @@ -212,3 +212,6 @@ HDFS-8364. Erasure coding: fix some minor bugs in EC CLI (Walter Su via vinayakumarb) + + HDFS-8391. NN should consider current EC tasks handling count from DN while + assigning new tasks. (umamahesh) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 5eca2c7e98..a1a80ee3fd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -1909,6 +1909,21 @@ void incrDatanodeNetworkErrors(String host) { int getXmitsInProgress() { return xmitsInProgress.get(); } + + /** + * Increments the xmitsInProgress count. xmitsInProgress count represents the + * number of data replication/reconstruction tasks running currently. + */ + public void incrementXmitsInProgress() { + xmitsInProgress.getAndIncrement(); + } + + /** + * Decrements the xmitsInProgress count + */ + public void decrementXmitsInProgress() { + xmitsInProgress.getAndDecrement(); + } private void reportBadBlock(final BPOfferService bpos, final ExtendedBlock block, final String msg) { @@ -2128,7 +2143,7 @@ private class DataTransfer implements Runnable { */ @Override public void run() { - xmitsInProgress.getAndIncrement(); + incrementXmitsInProgress(); Socket sock = null; DataOutputStream out = null; DataInputStream in = null; @@ -2207,7 +2222,7 @@ public void run() { // check if there are any disk problem checkDiskErrorAsync(); } finally { - xmitsInProgress.getAndDecrement(); + decrementXmitsInProgress(); IOUtils.closeStream(blockSender); IOUtils.closeStream(out); IOUtils.closeStream(in); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java index eedb1914ce..7b3c24d35a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/erasurecode/ErasureCodingWorker.java @@ -312,6 +312,7 @@ private long getBlockLen(ExtendedBlock blockGroup, int i) { @Override public void run() { + datanode.incrementXmitsInProgress(); try { // Store the indices of successfully read source // This will be updated after doing real read. @@ -397,8 +398,9 @@ public void run() { // Currently we don't check the acks for packets, this is similar as // block replication. } catch (Throwable e) { - LOG.warn("Failed to recover striped block: " + blockGroup); + LOG.warn("Failed to recover striped block: " + blockGroup, e); } finally { + datanode.decrementXmitsInProgress(); // close block readers for (StripedReader stripedReader : stripedReaders) { closeBlockReader(stripedReader.blockReader);