HDFS-12607. [READ] Even one dead datanode with PROVIDED storage results in ProvidedStorageInfo being marked as FAILED

This commit is contained in:
Virajith Jalaparti 2017-11-06 11:05:59 -08:00 committed by Chris Douglas
parent 98f5ed5aa3
commit 71d0a82571
3 changed files with 71 additions and 15 deletions

View File

@ -455,8 +455,10 @@ private void updateStorageStats(StorageReport[] reports, long cacheCapacity,
totalDfsUsed += report.getDfsUsed();
totalNonDfsUsed += report.getNonDfsUsed();
if (StorageType.PROVIDED.equals(
report.getStorage().getStorageType())) {
// for PROVIDED storages, do not call updateStorage() unless
// DatanodeStorageInfo already exists!
if (StorageType.PROVIDED.equals(report.getStorage().getStorageType())
&& storageMap.get(report.getStorage().getStorageID()) == null) {
continue;
}
DatanodeStorageInfo storage = updateStorage(report.getStorage());

View File

@ -66,7 +66,6 @@ public class ProvidedStorageMap {
// limit to a single provider for now
private RwLock lock;
private BlockManager bm;
private boolean hasDNs = false;
private BlockAliasMap aliasMap;
private final String storageId;
@ -123,6 +122,11 @@ DatanodeStorageInfo getStorage(DatanodeDescriptor dn, DatanodeStorage s,
BlockReportContext context) throws IOException {
if (providedEnabled && storageId.equals(s.getStorageID())) {
if (StorageType.PROVIDED.equals(s.getStorageType())) {
if (providedStorageInfo.getState() == State.FAILED
&& s.getState() == State.NORMAL) {
providedStorageInfo.setState(State.NORMAL);
LOG.info("Provided storage transitioning to state " + State.NORMAL);
}
processProvidedStorageReport(context);
dn.injectStorage(providedStorageInfo);
return providedDescriptor.getProvidedStorage(dn, s);
@ -135,21 +139,14 @@ DatanodeStorageInfo getStorage(DatanodeDescriptor dn, DatanodeStorage s,
private void processProvidedStorageReport(BlockReportContext context)
throws IOException {
assert lock.hasWriteLock() : "Not holding write lock";
if (hasDNs) {
return;
}
if (providedStorageInfo.getBlockReportCount() == 0) {
if (providedStorageInfo.getBlockReportCount() == 0
|| providedDescriptor.activeProvidedDatanodes() == 0) {
LOG.info("Calling process first blk report from storage: "
+ providedStorageInfo);
// first pass; periodic refresh should call bm.processReport
bm.processFirstBlockReport(providedStorageInfo,
new ProvidedBlockList(aliasMap.getReader(null).iterator()));
} else {
bm.processReport(providedStorageInfo,
new ProvidedBlockList(aliasMap.getReader(null).iterator()),
context);
}
hasDNs = true;
}
@VisibleForTesting
@ -167,9 +164,10 @@ public LocatedBlockBuilder newLocatedBlocks(int maxValue) {
public void removeDatanode(DatanodeDescriptor dnToRemove) {
if (providedEnabled) {
assert lock.hasWriteLock() : "Not holding write lock";
int remainingDatanodes = providedDescriptor.remove(dnToRemove);
if (remainingDatanodes == 0) {
hasDNs = false;
providedDescriptor.remove(dnToRemove);
// if all datanodes fail, set the block report count to 0
if (providedDescriptor.activeProvidedDatanodes() == 0) {
providedStorageInfo.setBlockReportCount(0);
}
}
}
@ -466,6 +464,22 @@ boolean removeBlock(BlockInfo b) {
return false;
}
}
@Override
void setState(DatanodeStorage.State state) {
if (state == State.FAILED) {
// The state should change to FAILED only when there are no active
// datanodes with PROVIDED storage.
ProvidedDescriptor dn = (ProvidedDescriptor) getDatanodeDescriptor();
if (dn.activeProvidedDatanodes() == 0) {
LOG.info("Provided storage {} transitioning to state {}",
this, State.FAILED);
super.setState(state);
}
} else {
super.setState(state);
}
}
}
/**
* Used to emulate block reports for provided blocks.

View File

@ -492,4 +492,44 @@ public void testProvidedDatanodeFailures() throws Exception {
dnInfos[0].getXferAddr());
}
}
@Test(timeout=300000)
public void testTransientDeadDatanodes() throws Exception {
createImage(new FSTreeWalk(NAMEPATH, conf), NNDIRPATH,
FixedBlockResolver.class);
// 2 Datanodes, 1 PROVIDED and other DISK
startCluster(NNDIRPATH, 2, null,
new StorageType[][] {
{StorageType.PROVIDED},
{StorageType.DISK}},
false);
DataNode providedDatanode = cluster.getDataNodes().get(0);
DFSClient client = new DFSClient(new InetSocketAddress("localhost",
cluster.getNameNodePort()), cluster.getConfiguration(0));
for (int i= 0; i < numFiles; i++) {
String filename = "/" + filePrefix + i + fileSuffix;
DatanodeInfo[] dnInfos = getAndCheckBlockLocations(client, filename, 1);
// location should be the provided DN.
assertTrue(dnInfos[0].getDatanodeUuid()
.equals(providedDatanode.getDatanodeUuid()));
// NameNode thinks the datanode is down
BlockManagerTestUtil.noticeDeadDatanode(
cluster.getNameNode(),
providedDatanode.getDatanodeId().getXferAddr());
cluster.waitActive();
cluster.triggerHeartbeats();
Thread.sleep(1000);
// should find the block on the 2nd provided datanode.
dnInfos = getAndCheckBlockLocations(client, filename, 1);
assertTrue(
dnInfos[0].getDatanodeUuid()
.equals(providedDatanode.getDatanodeUuid()));
}
}
}