HDFS-12607. [READ] Even one dead datanode with PROVIDED storage results in ProvidedStorageInfo being marked as FAILED
This commit is contained in:
parent
98f5ed5aa3
commit
71d0a82571
@ -455,8 +455,10 @@ private void updateStorageStats(StorageReport[] reports, long cacheCapacity,
|
||||
totalDfsUsed += report.getDfsUsed();
|
||||
totalNonDfsUsed += report.getNonDfsUsed();
|
||||
|
||||
if (StorageType.PROVIDED.equals(
|
||||
report.getStorage().getStorageType())) {
|
||||
// for PROVIDED storages, do not call updateStorage() unless
|
||||
// DatanodeStorageInfo already exists!
|
||||
if (StorageType.PROVIDED.equals(report.getStorage().getStorageType())
|
||||
&& storageMap.get(report.getStorage().getStorageID()) == null) {
|
||||
continue;
|
||||
}
|
||||
DatanodeStorageInfo storage = updateStorage(report.getStorage());
|
||||
|
@ -66,7 +66,6 @@ public class ProvidedStorageMap {
|
||||
// limit to a single provider for now
|
||||
private RwLock lock;
|
||||
private BlockManager bm;
|
||||
private boolean hasDNs = false;
|
||||
private BlockAliasMap aliasMap;
|
||||
|
||||
private final String storageId;
|
||||
@ -123,6 +122,11 @@ DatanodeStorageInfo getStorage(DatanodeDescriptor dn, DatanodeStorage s,
|
||||
BlockReportContext context) throws IOException {
|
||||
if (providedEnabled && storageId.equals(s.getStorageID())) {
|
||||
if (StorageType.PROVIDED.equals(s.getStorageType())) {
|
||||
if (providedStorageInfo.getState() == State.FAILED
|
||||
&& s.getState() == State.NORMAL) {
|
||||
providedStorageInfo.setState(State.NORMAL);
|
||||
LOG.info("Provided storage transitioning to state " + State.NORMAL);
|
||||
}
|
||||
processProvidedStorageReport(context);
|
||||
dn.injectStorage(providedStorageInfo);
|
||||
return providedDescriptor.getProvidedStorage(dn, s);
|
||||
@ -135,21 +139,14 @@ DatanodeStorageInfo getStorage(DatanodeDescriptor dn, DatanodeStorage s,
|
||||
private void processProvidedStorageReport(BlockReportContext context)
|
||||
throws IOException {
|
||||
assert lock.hasWriteLock() : "Not holding write lock";
|
||||
if (hasDNs) {
|
||||
return;
|
||||
}
|
||||
if (providedStorageInfo.getBlockReportCount() == 0) {
|
||||
if (providedStorageInfo.getBlockReportCount() == 0
|
||||
|| providedDescriptor.activeProvidedDatanodes() == 0) {
|
||||
LOG.info("Calling process first blk report from storage: "
|
||||
+ providedStorageInfo);
|
||||
// first pass; periodic refresh should call bm.processReport
|
||||
bm.processFirstBlockReport(providedStorageInfo,
|
||||
new ProvidedBlockList(aliasMap.getReader(null).iterator()));
|
||||
} else {
|
||||
bm.processReport(providedStorageInfo,
|
||||
new ProvidedBlockList(aliasMap.getReader(null).iterator()),
|
||||
context);
|
||||
}
|
||||
hasDNs = true;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
@ -167,9 +164,10 @@ public LocatedBlockBuilder newLocatedBlocks(int maxValue) {
|
||||
public void removeDatanode(DatanodeDescriptor dnToRemove) {
|
||||
if (providedEnabled) {
|
||||
assert lock.hasWriteLock() : "Not holding write lock";
|
||||
int remainingDatanodes = providedDescriptor.remove(dnToRemove);
|
||||
if (remainingDatanodes == 0) {
|
||||
hasDNs = false;
|
||||
providedDescriptor.remove(dnToRemove);
|
||||
// if all datanodes fail, set the block report count to 0
|
||||
if (providedDescriptor.activeProvidedDatanodes() == 0) {
|
||||
providedStorageInfo.setBlockReportCount(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -466,6 +464,22 @@ boolean removeBlock(BlockInfo b) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
void setState(DatanodeStorage.State state) {
|
||||
if (state == State.FAILED) {
|
||||
// The state should change to FAILED only when there are no active
|
||||
// datanodes with PROVIDED storage.
|
||||
ProvidedDescriptor dn = (ProvidedDescriptor) getDatanodeDescriptor();
|
||||
if (dn.activeProvidedDatanodes() == 0) {
|
||||
LOG.info("Provided storage {} transitioning to state {}",
|
||||
this, State.FAILED);
|
||||
super.setState(state);
|
||||
}
|
||||
} else {
|
||||
super.setState(state);
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Used to emulate block reports for provided blocks.
|
||||
|
@ -492,4 +492,44 @@ public void testProvidedDatanodeFailures() throws Exception {
|
||||
dnInfos[0].getXferAddr());
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout=300000)
|
||||
public void testTransientDeadDatanodes() throws Exception {
|
||||
createImage(new FSTreeWalk(NAMEPATH, conf), NNDIRPATH,
|
||||
FixedBlockResolver.class);
|
||||
// 2 Datanodes, 1 PROVIDED and other DISK
|
||||
startCluster(NNDIRPATH, 2, null,
|
||||
new StorageType[][] {
|
||||
{StorageType.PROVIDED},
|
||||
{StorageType.DISK}},
|
||||
false);
|
||||
|
||||
DataNode providedDatanode = cluster.getDataNodes().get(0);
|
||||
|
||||
DFSClient client = new DFSClient(new InetSocketAddress("localhost",
|
||||
cluster.getNameNodePort()), cluster.getConfiguration(0));
|
||||
|
||||
for (int i= 0; i < numFiles; i++) {
|
||||
String filename = "/" + filePrefix + i + fileSuffix;
|
||||
|
||||
DatanodeInfo[] dnInfos = getAndCheckBlockLocations(client, filename, 1);
|
||||
// location should be the provided DN.
|
||||
assertTrue(dnInfos[0].getDatanodeUuid()
|
||||
.equals(providedDatanode.getDatanodeUuid()));
|
||||
|
||||
// NameNode thinks the datanode is down
|
||||
BlockManagerTestUtil.noticeDeadDatanode(
|
||||
cluster.getNameNode(),
|
||||
providedDatanode.getDatanodeId().getXferAddr());
|
||||
cluster.waitActive();
|
||||
cluster.triggerHeartbeats();
|
||||
Thread.sleep(1000);
|
||||
|
||||
// should find the block on the 2nd provided datanode.
|
||||
dnInfos = getAndCheckBlockLocations(client, filename, 1);
|
||||
assertTrue(
|
||||
dnInfos[0].getDatanodeUuid()
|
||||
.equals(providedDatanode.getDatanodeUuid()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user