diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 58491a6b29..13d99691c3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -1050,6 +1050,9 @@ Release 2.8.0 - UNRELEASED
HDFS-8778. TestBlockReportRateLimiting#testLeaseExpiration can deadlock.
(Arpit Agarwal)
+ HDFS-8344. NameNode doesn't recover lease for files with missing blocks
+ (raviprak)
+
Release 2.7.2 - UNRELEASED
INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 0e569f0d56..210d1e5daf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -440,6 +440,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final long DFS_CACHEREPORT_INTERVAL_MSEC_DEFAULT = 10 * 1000;
public static final String DFS_BLOCK_INVALIDATE_LIMIT_KEY = "dfs.block.invalidate.limit";
public static final int DFS_BLOCK_INVALIDATE_LIMIT_DEFAULT = 1000;
+ public static final String DFS_BLOCK_UC_MAX_RECOVERY_ATTEMPTS = "dfs.block.uc.max.recovery.attempts";
+ public static final int DFS_BLOCK_UC_MAX_RECOVERY_ATTEMPTS_DEFAULT = 5;
+
public static final String DFS_DEFAULT_MAX_CORRUPT_FILES_RETURNED_KEY = "dfs.corruptfilesreturned.max";
public static final int DFS_DEFAULT_MAX_CORRUPT_FILES_RETURNED = 500;
/* Maximum number of blocks to process for initializing replication queues */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java
index 9cd3987a5c..28f1633e92 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hdfs.server.blockmanagement;
import java.io.IOException;
-import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@@ -61,6 +60,11 @@ public abstract class BlockInfoUnderConstruction extends BlockInfo {
*/
protected Block truncateBlock;
+ /** The number of times all replicas will be used to attempt recovery before
+ * giving up and marking the block under construction missing.
+ */
+ private int recoveryAttemptsBeforeMarkingBlockMissing;
+
/**
* ReplicaUnderConstruction contains information about replicas while
* they are under construction.
@@ -174,6 +178,8 @@ public BlockInfoUnderConstruction(Block blk, short replication,
"BlockInfoUnderConstruction cannot be in COMPLETE state");
this.blockUCState = state;
setExpectedLocations(targets);
+ this.recoveryAttemptsBeforeMarkingBlockMissing =
+ BlockManager.getMaxBlockUCRecoveries();
}
/** Set expected locations. */
@@ -271,7 +277,7 @@ public void initializeBlockRecovery(long recoveryId) {
if (replicas.size() == 0) {
NameNode.blockStateChangeLog.warn("BLOCK* " +
"BlockInfoUnderConstruction.initLeaseRecovery: " +
- "No blocks found, lease removed.");
+ "No replicas found.");
}
boolean allLiveReplicasTriedAsPrimary = true;
for (int i = 0; i < replicas.size(); i++) {
@@ -283,6 +289,11 @@ public void initializeBlockRecovery(long recoveryId) {
}
}
if (allLiveReplicasTriedAsPrimary) {
+ recoveryAttemptsBeforeMarkingBlockMissing--;
+ NameNode.blockStateChangeLog.info("Tried to recover " + this +" using all"
+ + " replicas. Will try " + recoveryAttemptsBeforeMarkingBlockMissing
+ + " more times");
+
// Just set all the replicas to be chosen whether they are alive or not.
for (int i = 0; i < replicas.size(); i++) {
replicas.get(i).setChosenAsPrimary(false);
@@ -341,6 +352,10 @@ void addReplicaIfNotPresent(DatanodeStorageInfo storage,
replicas.add(new ReplicaUnderConstruction(block, storage, rState));
}
+ public int getNumRecoveryAttemptsLeft() {
+ return recoveryAttemptsBeforeMarkingBlockMissing;
+ }
+
/**
* Convert an under construction block to a complete block.
*
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 7dce2a804b..16dfe451d8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -17,7 +17,6 @@
*/
package org.apache.hadoop.hdfs.server.blockmanagement;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX;
import static org.apache.hadoop.util.ExitUtil.terminate;
import java.io.IOException;
@@ -274,6 +273,13 @@ public int getPendingDataNodeMessageCount() {
private BlockPlacementPolicy blockplacement;
private final BlockStoragePolicySuite storagePolicySuite;
+ /** The number of times a block under construction's recovery will be
+ * attempted using all known replicas. e.g. if there are 3 replicas, each
+ * node will be tried 5 times (for a total of 15 retries across all nodes)*/
+ private static int maxBlockUCRecoveries =
+ DFSConfigKeys.DFS_BLOCK_UC_MAX_RECOVERY_ATTEMPTS_DEFAULT;
+ public static int getMaxBlockUCRecoveries() { return maxBlockUCRecoveries; }
+
/** Check whether name system is running before terminating */
private boolean checkNSRunning = true;
@@ -282,6 +288,9 @@ public BlockManager(final Namesystem namesystem, final Configuration conf)
this.namesystem = namesystem;
datanodeManager = new DatanodeManager(this, namesystem, conf);
heartbeatManager = datanodeManager.getHeartbeatManager();
+ maxBlockUCRecoveries = conf.getInt(
+ DFSConfigKeys.DFS_BLOCK_UC_MAX_RECOVERY_ATTEMPTS,
+ DFSConfigKeys.DFS_BLOCK_UC_MAX_RECOVERY_ATTEMPTS_DEFAULT);
startupDelayBlockDeletionInMs = conf.getLong(
DFSConfigKeys.DFS_NAMENODE_STARTUP_DELAY_BLOCK_DELETION_SEC_KEY,
@@ -731,7 +740,8 @@ private BlockInfo completeBlock(final BlockCollection bc,
/**
* Force the given block in the given file to be marked as complete,
* regardless of whether enough replicas are present. This is necessary
- * when tailing edit logs as a Standby.
+ * when tailing edit logs as a Standby or when recovering a lease on a file
+ * with missing blocks.
*/
public BlockInfo forceCompleteBlock(final BlockCollection bc,
final BlockInfoUnderConstruction block) throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 7c6d6a19f8..becd05a30b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -3287,6 +3287,16 @@ boolean internalReleaseLease(Lease lease, String src, INodesInPath iip,
+ "Removed empty last block and closed file.");
return true;
}
+
+ //If the block's recovery has been attempted enough times, mark the block
+ //complete anyway and recover the lease
+ if(uc.getNumRecoveryAttemptsLeft() == 0) {
+ blockManager.forceCompleteBlock(pendingFile, uc);
+ finalizeINodeFileUnderConstruction(src, pendingFile,
+ iip.getLatestSnapshotId());
+ return true;
+ }
+
// start recovery of the last block for this file
long blockRecoveryId = nextGenerationStamp(blockIdManager.isLegacyBlock(uc));
lease = reassignLease(lease, src, recoveryLeaseHolder, pendingFile);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 8cb7d5f019..dee9111361 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -424,6 +424,15 @@
The lifetime of access tokens in minutes.
+
+ dfs.block.uc.max.recovery.attempts
+ 5
+ The number of times a block under construction's recovery will be
+ attempted using all known replicas. e.g. if there are 3 replicas, each node
+ will be tried 5 times (for a total of 15 retries across all nodes).
+
+
+
dfs.datanode.data.dir
file://${hadoop.tmp.dir}/dfs/data
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
index c9f3842d8b..c9448acb11 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery.java
@@ -27,6 +27,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CreateFlag;
+import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -258,4 +259,81 @@ public void testLeaseRecoveryAndAppend() throws Exception {
}
}
}
+
+ /**
+ * Test that when a client was writing to a file and died, and before the
+ * lease can be recovered, all the datanodes to which the file was written
+ * also die, after some time (5 * lease recovery times) the file is indeed
+ * closed and lease recovered.
+ * We also check that if the datanode came back after some time, the data
+ * originally written is not truncated
+ * @throws IOException
+ * @throws InterruptedException
+ */
+ @Test
+ public void testLeaseRecoveryWithMissingBlocks()
+ throws IOException, InterruptedException {
+ Configuration conf = new HdfsConfiguration();
+
+ //Start a cluster with 3 datanodes
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+ cluster.setLeasePeriod(LEASE_PERIOD, LEASE_PERIOD);
+ cluster.waitActive();
+
+ //create a file (with replication 1)
+ Path file = new Path("/testRecoveryFile");
+ DistributedFileSystem dfs = cluster.getFileSystem();
+ FSDataOutputStream out = dfs.create(file, (short) 1);
+
+ //This keeps count of the number of bytes written (AND is also the data we
+ //are writing)
+ long writtenBytes = 0;
+ while (writtenBytes < 2 * 1024 * 1024) {
+ out.writeLong(writtenBytes);
+ writtenBytes += 8;
+ }
+ System.out.println("Written " + writtenBytes + " bytes");
+ out.hsync();
+ System.out.println("hsynced the data");
+
+ //Kill the datanode to which the file was written.
+ DatanodeInfo dn =
+ ((DFSOutputStream) out.getWrappedStream()).getPipeline()[0];
+ DataNodeProperties dnStopped = cluster.stopDataNode(dn.getName());
+
+ //Wait at most 20 seconds for the lease to be recovered
+ LeaseManager lm = NameNodeAdapter.getLeaseManager(cluster.getNamesystem());
+ int i = 40;
+ while(i-- > 0 && lm.countLease() != 0) {
+ System.out.println("Still got " + lm.countLease() + " lease(s)");
+ Thread.sleep(500);
+ }
+ assertTrue("The lease was not recovered", lm.countLease() == 0);
+ System.out.println("Got " + lm.countLease() + " leases");
+
+ //Make sure we can't read any data because the datanode is dead
+ FSDataInputStream in = dfs.open(file);
+ try {
+ in.readLong();
+ assertTrue("Shouldn't have reached here", false);
+ } catch(BlockMissingException bme) {
+ System.out.println("Correctly got BlockMissingException because datanode"
+ + " is still dead");
+ }
+
+ //Bring the dead datanode back.
+ cluster.restartDataNode(dnStopped);
+ System.out.println("Restart datanode");
+
+ //Make sure we can read all the data back (since we hsync'ed).
+ in = dfs.open(file);
+ int readBytes = 0;
+ while(in.available() != 0) {
+ assertEquals("Didn't read the data we wrote", in.readLong(), readBytes);
+ readBytes += 8;
+ }
+ assertEquals("Didn't get all the data", readBytes, writtenBytes);
+ System.out.println("Read back all the " + readBytes + " bytes");
+ }
+
}