diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
index e9770548a7..b8aae97030 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
@@ -942,6 +942,7 @@ protected void completeFile(ExtendedBlock last) throws IOException {
long localstart = Time.monotonicNow();
final DfsClientConf conf = dfsClient.getConf();
long sleeptime = conf.getBlockWriteLocateFollowingInitialDelayMs();
+ long maxSleepTime = conf.getBlockWriteLocateFollowingMaxDelayMs();
boolean fileComplete = false;
int retries = conf.getNumBlockWriteLocateFollowingRetry();
while (!fileComplete) {
@@ -965,7 +966,7 @@ protected void completeFile(ExtendedBlock last) throws IOException {
}
retries--;
Thread.sleep(sleeptime);
- sleeptime *= 2;
+ sleeptime = calculateDelayForNextRetry(sleeptime, maxSleepTime);
if (Time.monotonicNow() - localstart > 5000) {
DFSClient.LOG.info("Could not complete " + src + " retrying...");
}
@@ -1075,6 +1076,7 @@ static LocatedBlock addBlock(DatanodeInfo[] excludedNodes,
final DfsClientConf conf = dfsClient.getConf();
int retries = conf.getNumBlockWriteLocateFollowingRetry();
long sleeptime = conf.getBlockWriteLocateFollowingInitialDelayMs();
+ long maxSleepTime = conf.getBlockWriteLocateFollowingMaxDelayMs();
long localstart = Time.monotonicNow();
while (true) {
try {
@@ -1106,7 +1108,7 @@ static LocatedBlock addBlock(DatanodeInfo[] excludedNodes,
LOG.warn("NotReplicatedYetException sleeping " + src
+ " retries left " + retries);
Thread.sleep(sleeptime);
- sleeptime *= 2;
+ sleeptime = calculateDelayForNextRetry(sleeptime, maxSleepTime);
} catch (InterruptedException ie) {
LOG.warn("Caught exception", ie);
}
@@ -1117,4 +1119,19 @@ static LocatedBlock addBlock(DatanodeInfo[] excludedNodes,
}
}
}
+
+ /**
+ * Calculates the delay for the next retry.
+ *
+ * The delay is increased exponentially until the maximum delay is reached.
+ *
+ * @param previousDelay delay for the previous retry
+ * @param maxDelay maximum delay
+ * @return the minimum of the double of the previous delay
+ * and the maximum delay
+ */
+ private static long calculateDelayForNextRetry(long previousDelay,
+ long maxDelay) {
+ return Math.min(previousDelay * 2, maxDelay);
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java
index a8126700d0..b1ce78d304 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java
@@ -322,6 +322,9 @@ interface BlockWrite {
String LOCATEFOLLOWINGBLOCK_INITIAL_DELAY_MS_KEY =
PREFIX + "locateFollowingBlock.initial.delay.ms";
int LOCATEFOLLOWINGBLOCK_INITIAL_DELAY_MS_DEFAULT = 400;
+ String LOCATEFOLLOWINGBLOCK_MAX_DELAY_MS_KEY =
+ PREFIX + "locateFollowingBlock.max.delay.ms";
+ int LOCATEFOLLOWINGBLOCK_MAX_DELAY_MS_DEFAULT = 60000;
interface ReplaceDatanodeOnFailure {
String PREFIX = BlockWrite.PREFIX + "replace-datanode-on-failure.";
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/DfsClientConf.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/DfsClientConf.java
index e63e3f53b8..4110f9413d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/DfsClientConf.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/DfsClientConf.java
@@ -122,6 +122,7 @@ public class DfsClientConf {
private final int numBlockWriteRetry;
private final int numBlockWriteLocateFollowingRetry;
private final int blockWriteLocateFollowingInitialDelayMs;
+ private final int blockWriteLocateFollowingMaxDelayMs;
private final long defaultBlockSize;
private final long prefetchSize;
private final short defaultReplication;
@@ -237,6 +238,9 @@ public DfsClientConf(Configuration conf) {
blockWriteLocateFollowingInitialDelayMs = conf.getInt(
BlockWrite.LOCATEFOLLOWINGBLOCK_INITIAL_DELAY_MS_KEY,
BlockWrite.LOCATEFOLLOWINGBLOCK_INITIAL_DELAY_MS_DEFAULT);
+ blockWriteLocateFollowingMaxDelayMs = conf.getInt(
+ BlockWrite.LOCATEFOLLOWINGBLOCK_MAX_DELAY_MS_KEY,
+ BlockWrite.LOCATEFOLLOWINGBLOCK_MAX_DELAY_MS_DEFAULT);
uMask = FsPermission.getUMask(conf);
connectToDnViaHostname = conf.getBoolean(DFS_CLIENT_USE_DN_HOSTNAME,
DFS_CLIENT_USE_DN_HOSTNAME_DEFAULT);
@@ -349,6 +353,10 @@ public int getBlockWriteLocateFollowingInitialDelayMs() {
return blockWriteLocateFollowingInitialDelayMs;
}
+ public int getBlockWriteLocateFollowingMaxDelayMs() {
+ return blockWriteLocateFollowingMaxDelayMs;
+ }
+
/**
* @return the hdfsTimeout
*/
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 503ba8f7ea..cb770f512d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -3135,10 +3135,21 @@
dfs.client.block.write.locateFollowingBlock.initial.delay.ms
400
The initial delay (unit is ms) for locateFollowingBlock,
- the delay time will increase exponentially(double) for each retry.
+ the delay time will increase exponentially(double) for each retry
+ until dfs.client.block.write.locateFollowingBlock.max.delay.ms is reached,
+ after that the delay for each retry will be
+ dfs.client.block.write.locateFollowingBlock.max.delay.ms.
+
+ dfs.client.block.write.locateFollowingBlock.max.delay.ms
+ 60000
+
+ The maximum delay (unit is ms) before retrying locateFollowingBlock.
+
+
+
dfs.ha.zkfc.nn.http.timeout.ms
20000
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java
index 14d0ee0060..49de91d3e7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java
@@ -1229,27 +1229,54 @@ static void parseMultipleLinearRandomRetry(String expected, String s) {
}
}
+ /**
+ * Tests default configuration values and configuration setting
+ * of locate following block delays and number of retries.
+ *
+ * Configuration values tested:
+ * - dfs.client.block.write.locateFollowingBlock.initial.delay.ms
+ * - dfs.client.block.write.locateFollowingBlock.max.delay.ms
+ * - dfs.client.block.write.locateFollowingBlock.retries
+ */
@Test
- public void testDFSClientConfigurationLocateFollowingBlockInitialDelay()
+ public void testDFSClientConfigurationLocateFollowingBlock()
throws Exception {
- // test if HdfsClientConfigKeys.BlockWrite.LOCATEFOLLOWINGBLOCK_INITIAL_DELAY_KEY
- // is not configured, verify DFSClient uses the default value 400.
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
+ final int initialDelayTestValue = 1000;
+ final int maxDelayTestValue = 35000;
+ final int retryTestValue = 7;
+
+ final int defaultInitialDelay = 400;
+ final int defaultMaxDelay = 60000;
+ final int defultRetry = 5;
+
try {
cluster.waitActive();
NamenodeProtocols nn = cluster.getNameNodeRpc();
DFSClient client = new DFSClient(null, nn, conf, null);
- assertEquals(client.getConf().
- getBlockWriteLocateFollowingInitialDelayMs(), 400);
+ assertEquals(defaultInitialDelay, client.getConf().
+ getBlockWriteLocateFollowingInitialDelayMs());
+ assertEquals(defaultMaxDelay, client.getConf().
+ getBlockWriteLocateFollowingMaxDelayMs());
+ assertEquals(defultRetry, client.getConf().
+ getNumBlockWriteLocateFollowingRetry());
- // change HdfsClientConfigKeys.BlockWrite.LOCATEFOLLOWINGBLOCK_INITIAL_DELAY_KEY,
- // verify DFSClient uses the configured value 1000.
conf.setInt(
HdfsClientConfigKeys.BlockWrite.LOCATEFOLLOWINGBLOCK_INITIAL_DELAY_MS_KEY,
- 1000);
+ initialDelayTestValue);
+ conf.setInt(
+ HdfsClientConfigKeys.BlockWrite.LOCATEFOLLOWINGBLOCK_MAX_DELAY_MS_KEY,
+ maxDelayTestValue);
+ conf.setInt(
+ HdfsClientConfigKeys.BlockWrite.LOCATEFOLLOWINGBLOCK_RETRIES_KEY,
+ retryTestValue);
client = new DFSClient(null, nn, conf, null);
- assertEquals(client.getConf().
- getBlockWriteLocateFollowingInitialDelayMs(), 1000);
+ assertEquals(initialDelayTestValue, client.getConf().
+ getBlockWriteLocateFollowingInitialDelayMs());
+ assertEquals(maxDelayTestValue, client.getConf().
+ getBlockWriteLocateFollowingMaxDelayMs());
+ assertEquals(retryTestValue, client.getConf().
+ getNumBlockWriteLocateFollowingRetry());
} finally {
cluster.shutdown();
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tools/TestHdfsConfigFields.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tools/TestHdfsConfigFields.java
index 5cae2fcd77..9fd505fd4a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tools/TestHdfsConfigFields.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tools/TestHdfsConfigFields.java
@@ -43,6 +43,7 @@ public void initializeMemberVariables() {
configurationClasses = new Class[] { HdfsClientConfigKeys.class,
HdfsClientConfigKeys.Failover.class,
HdfsClientConfigKeys.StripedRead.class, DFSConfigKeys.class,
+ HdfsClientConfigKeys.BlockWrite.class,
HdfsClientConfigKeys.BlockWrite.ReplaceDatanodeOnFailure.class };
// Set error modes