HDFS-8486. DN startup may cause severe data loss (Daryn Sharp via Colin P. McCabe)

This commit is contained in:
Colin Patrick Mccabe 2015-06-02 11:40:37 -07:00
parent a2bd6217eb
commit 03fb5c6425
4 changed files with 60 additions and 7 deletions

View File

@ -924,6 +924,9 @@ Release 2.7.1 - UNRELEASED
HDFS-8451. DFSClient probe for encryption testing interprets empty URI HDFS-8451. DFSClient probe for encryption testing interprets empty URI
property for "enabled". (Steve Loughran via xyao) property for "enabled". (Steve Loughran via xyao)
HDFS-8486. DN startup may cause severe data loss (Daryn Sharp via Colin P.
McCabe)
Release 2.7.0 - 2015-04-20 Release 2.7.0 - 2015-04-20
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -1370,9 +1370,9 @@ void initBlockPool(BPOfferService bpos) throws IOException {
// failures. // failures.
checkDiskError(); checkDiskError();
initDirectoryScanner(conf);
data.addBlockPool(nsInfo.getBlockPoolID(), conf); data.addBlockPool(nsInfo.getBlockPoolID(), conf);
blockScanner.enableBlockPoolId(bpos.getBlockPoolId()); blockScanner.enableBlockPoolId(bpos.getBlockPoolId());
initDirectoryScanner(conf);
} }
List<BPOfferService> getAllBpOs() { List<BPOfferService> getAllBpOs() {

View File

@ -550,10 +550,28 @@ ReplicaInfo resolveDuplicateReplicas(
// Leave both block replicas in place. // Leave both block replicas in place.
return replica1; return replica1;
} }
final ReplicaInfo replicaToDelete =
selectReplicaToDelete(replica1, replica2);
final ReplicaInfo replicaToKeep =
(replicaToDelete != replica1) ? replica1 : replica2;
// Update volumeMap and delete the replica
volumeMap.add(bpid, replicaToKeep);
if (replicaToDelete != null) {
deleteReplica(replicaToDelete);
}
return replicaToKeep;
}
static ReplicaInfo selectReplicaToDelete(final ReplicaInfo replica1,
final ReplicaInfo replica2) {
ReplicaInfo replicaToKeep; ReplicaInfo replicaToKeep;
ReplicaInfo replicaToDelete; ReplicaInfo replicaToDelete;
// it's the same block so don't ever delete it, even if GS or size
// differs. caller should keep the one it just discovered on disk
if (replica1.getBlockFile().equals(replica2.getBlockFile())) {
return null;
}
if (replica1.getGenerationStamp() != replica2.getGenerationStamp()) { if (replica1.getGenerationStamp() != replica2.getGenerationStamp()) {
replicaToKeep = replica1.getGenerationStamp() > replica2.getGenerationStamp() replicaToKeep = replica1.getGenerationStamp() > replica2.getGenerationStamp()
? replica1 : replica2; ? replica1 : replica2;
@ -573,10 +591,10 @@ ReplicaInfo resolveDuplicateReplicas(
LOG.debug("resolveDuplicateReplicas decide to keep " + replicaToKeep LOG.debug("resolveDuplicateReplicas decide to keep " + replicaToKeep
+ ". Will try to delete " + replicaToDelete); + ". Will try to delete " + replicaToDelete);
} }
return replicaToDelete;
}
// Update volumeMap. private void deleteReplica(final ReplicaInfo replicaToDelete) {
volumeMap.add(bpid, replicaToKeep);
// Delete the files on disk. Failure here is okay. // Delete the files on disk. Failure here is okay.
final File blockFile = replicaToDelete.getBlockFile(); final File blockFile = replicaToDelete.getBlockFile();
if (!blockFile.delete()) { if (!blockFile.delete()) {
@ -586,10 +604,8 @@ ReplicaInfo resolveDuplicateReplicas(
if (!metaFile.delete()) { if (!metaFile.delete()) {
LOG.warn("Failed to delete meta file " + metaFile); LOG.warn("Failed to delete meta file " + metaFile);
} }
return replicaToKeep;
} }
/** /**
* Find out the number of bytes in the block that match its crc. * Find out the number of bytes in the block that match its crc.
* *

View File

@ -51,6 +51,7 @@
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.mockito.Matchers; import org.mockito.Matchers;
import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock; import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer; import org.mockito.stubbing.Answer;
@ -66,6 +67,8 @@
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.fail; import static org.junit.Assert.fail;
import static org.mockito.Matchers.any; import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyListOf; import static org.mockito.Matchers.anyListOf;
@ -411,4 +414,35 @@ public void testDeletingBlocks() throws IOException {
cluster.shutdown(); cluster.shutdown();
} }
} }
@Test
public void testDuplicateReplicaResolution() throws IOException {
FsVolumeImpl fsv1 = Mockito.mock(FsVolumeImpl.class);
FsVolumeImpl fsv2 = Mockito.mock(FsVolumeImpl.class);
File f1 = new File("d1/block");
File f2 = new File("d2/block");
ReplicaInfo replicaOlder = new FinalizedReplica(1,1,1,fsv1,f1);
ReplicaInfo replica = new FinalizedReplica(1,2,2,fsv1,f1);
ReplicaInfo replicaSame = new FinalizedReplica(1,2,2,fsv1,f1);
ReplicaInfo replicaNewer = new FinalizedReplica(1,3,3,fsv1,f1);
ReplicaInfo replicaOtherOlder = new FinalizedReplica(1,1,1,fsv2,f2);
ReplicaInfo replicaOtherSame = new FinalizedReplica(1,2,2,fsv2,f2);
ReplicaInfo replicaOtherNewer = new FinalizedReplica(1,3,3,fsv2,f2);
// equivalent path so don't remove either
assertNull(BlockPoolSlice.selectReplicaToDelete(replicaSame, replica));
assertNull(BlockPoolSlice.selectReplicaToDelete(replicaOlder, replica));
assertNull(BlockPoolSlice.selectReplicaToDelete(replicaNewer, replica));
// keep latest found replica
assertSame(replica,
BlockPoolSlice.selectReplicaToDelete(replicaOtherSame, replica));
assertSame(replicaOtherOlder,
BlockPoolSlice.selectReplicaToDelete(replicaOtherOlder, replica));
assertSame(replica,
BlockPoolSlice.selectReplicaToDelete(replicaOtherNewer, replica));
}
} }