HDFS-12607. [READ] Even one dead datanode with PROVIDED storage results in ProvidedStorageInfo being marked as FAILED
This commit is contained in:
parent
98f5ed5aa3
commit
71d0a82571
@ -455,8 +455,10 @@ private void updateStorageStats(StorageReport[] reports, long cacheCapacity,
|
|||||||
totalDfsUsed += report.getDfsUsed();
|
totalDfsUsed += report.getDfsUsed();
|
||||||
totalNonDfsUsed += report.getNonDfsUsed();
|
totalNonDfsUsed += report.getNonDfsUsed();
|
||||||
|
|
||||||
if (StorageType.PROVIDED.equals(
|
// for PROVIDED storages, do not call updateStorage() unless
|
||||||
report.getStorage().getStorageType())) {
|
// DatanodeStorageInfo already exists!
|
||||||
|
if (StorageType.PROVIDED.equals(report.getStorage().getStorageType())
|
||||||
|
&& storageMap.get(report.getStorage().getStorageID()) == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
DatanodeStorageInfo storage = updateStorage(report.getStorage());
|
DatanodeStorageInfo storage = updateStorage(report.getStorage());
|
||||||
|
@ -66,7 +66,6 @@ public class ProvidedStorageMap {
|
|||||||
// limit to a single provider for now
|
// limit to a single provider for now
|
||||||
private RwLock lock;
|
private RwLock lock;
|
||||||
private BlockManager bm;
|
private BlockManager bm;
|
||||||
private boolean hasDNs = false;
|
|
||||||
private BlockAliasMap aliasMap;
|
private BlockAliasMap aliasMap;
|
||||||
|
|
||||||
private final String storageId;
|
private final String storageId;
|
||||||
@ -123,6 +122,11 @@ DatanodeStorageInfo getStorage(DatanodeDescriptor dn, DatanodeStorage s,
|
|||||||
BlockReportContext context) throws IOException {
|
BlockReportContext context) throws IOException {
|
||||||
if (providedEnabled && storageId.equals(s.getStorageID())) {
|
if (providedEnabled && storageId.equals(s.getStorageID())) {
|
||||||
if (StorageType.PROVIDED.equals(s.getStorageType())) {
|
if (StorageType.PROVIDED.equals(s.getStorageType())) {
|
||||||
|
if (providedStorageInfo.getState() == State.FAILED
|
||||||
|
&& s.getState() == State.NORMAL) {
|
||||||
|
providedStorageInfo.setState(State.NORMAL);
|
||||||
|
LOG.info("Provided storage transitioning to state " + State.NORMAL);
|
||||||
|
}
|
||||||
processProvidedStorageReport(context);
|
processProvidedStorageReport(context);
|
||||||
dn.injectStorage(providedStorageInfo);
|
dn.injectStorage(providedStorageInfo);
|
||||||
return providedDescriptor.getProvidedStorage(dn, s);
|
return providedDescriptor.getProvidedStorage(dn, s);
|
||||||
@ -135,21 +139,14 @@ DatanodeStorageInfo getStorage(DatanodeDescriptor dn, DatanodeStorage s,
|
|||||||
private void processProvidedStorageReport(BlockReportContext context)
|
private void processProvidedStorageReport(BlockReportContext context)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
assert lock.hasWriteLock() : "Not holding write lock";
|
assert lock.hasWriteLock() : "Not holding write lock";
|
||||||
if (hasDNs) {
|
if (providedStorageInfo.getBlockReportCount() == 0
|
||||||
return;
|
|| providedDescriptor.activeProvidedDatanodes() == 0) {
|
||||||
}
|
|
||||||
if (providedStorageInfo.getBlockReportCount() == 0) {
|
|
||||||
LOG.info("Calling process first blk report from storage: "
|
LOG.info("Calling process first blk report from storage: "
|
||||||
+ providedStorageInfo);
|
+ providedStorageInfo);
|
||||||
// first pass; periodic refresh should call bm.processReport
|
// first pass; periodic refresh should call bm.processReport
|
||||||
bm.processFirstBlockReport(providedStorageInfo,
|
bm.processFirstBlockReport(providedStorageInfo,
|
||||||
new ProvidedBlockList(aliasMap.getReader(null).iterator()));
|
new ProvidedBlockList(aliasMap.getReader(null).iterator()));
|
||||||
} else {
|
|
||||||
bm.processReport(providedStorageInfo,
|
|
||||||
new ProvidedBlockList(aliasMap.getReader(null).iterator()),
|
|
||||||
context);
|
|
||||||
}
|
}
|
||||||
hasDNs = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
@ -167,9 +164,10 @@ public LocatedBlockBuilder newLocatedBlocks(int maxValue) {
|
|||||||
public void removeDatanode(DatanodeDescriptor dnToRemove) {
|
public void removeDatanode(DatanodeDescriptor dnToRemove) {
|
||||||
if (providedEnabled) {
|
if (providedEnabled) {
|
||||||
assert lock.hasWriteLock() : "Not holding write lock";
|
assert lock.hasWriteLock() : "Not holding write lock";
|
||||||
int remainingDatanodes = providedDescriptor.remove(dnToRemove);
|
providedDescriptor.remove(dnToRemove);
|
||||||
if (remainingDatanodes == 0) {
|
// if all datanodes fail, set the block report count to 0
|
||||||
hasDNs = false;
|
if (providedDescriptor.activeProvidedDatanodes() == 0) {
|
||||||
|
providedStorageInfo.setBlockReportCount(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -466,6 +464,22 @@ boolean removeBlock(BlockInfo b) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
void setState(DatanodeStorage.State state) {
|
||||||
|
if (state == State.FAILED) {
|
||||||
|
// The state should change to FAILED only when there are no active
|
||||||
|
// datanodes with PROVIDED storage.
|
||||||
|
ProvidedDescriptor dn = (ProvidedDescriptor) getDatanodeDescriptor();
|
||||||
|
if (dn.activeProvidedDatanodes() == 0) {
|
||||||
|
LOG.info("Provided storage {} transitioning to state {}",
|
||||||
|
this, State.FAILED);
|
||||||
|
super.setState(state);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
super.setState(state);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Used to emulate block reports for provided blocks.
|
* Used to emulate block reports for provided blocks.
|
||||||
|
@ -492,4 +492,44 @@ public void testProvidedDatanodeFailures() throws Exception {
|
|||||||
dnInfos[0].getXferAddr());
|
dnInfos[0].getXferAddr());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout=300000)
|
||||||
|
public void testTransientDeadDatanodes() throws Exception {
|
||||||
|
createImage(new FSTreeWalk(NAMEPATH, conf), NNDIRPATH,
|
||||||
|
FixedBlockResolver.class);
|
||||||
|
// 2 Datanodes, 1 PROVIDED and other DISK
|
||||||
|
startCluster(NNDIRPATH, 2, null,
|
||||||
|
new StorageType[][] {
|
||||||
|
{StorageType.PROVIDED},
|
||||||
|
{StorageType.DISK}},
|
||||||
|
false);
|
||||||
|
|
||||||
|
DataNode providedDatanode = cluster.getDataNodes().get(0);
|
||||||
|
|
||||||
|
DFSClient client = new DFSClient(new InetSocketAddress("localhost",
|
||||||
|
cluster.getNameNodePort()), cluster.getConfiguration(0));
|
||||||
|
|
||||||
|
for (int i= 0; i < numFiles; i++) {
|
||||||
|
String filename = "/" + filePrefix + i + fileSuffix;
|
||||||
|
|
||||||
|
DatanodeInfo[] dnInfos = getAndCheckBlockLocations(client, filename, 1);
|
||||||
|
// location should be the provided DN.
|
||||||
|
assertTrue(dnInfos[0].getDatanodeUuid()
|
||||||
|
.equals(providedDatanode.getDatanodeUuid()));
|
||||||
|
|
||||||
|
// NameNode thinks the datanode is down
|
||||||
|
BlockManagerTestUtil.noticeDeadDatanode(
|
||||||
|
cluster.getNameNode(),
|
||||||
|
providedDatanode.getDatanodeId().getXferAddr());
|
||||||
|
cluster.waitActive();
|
||||||
|
cluster.triggerHeartbeats();
|
||||||
|
Thread.sleep(1000);
|
||||||
|
|
||||||
|
// should find the block on the 2nd provided datanode.
|
||||||
|
dnInfos = getAndCheckBlockLocations(client, filename, 1);
|
||||||
|
assertTrue(
|
||||||
|
dnInfos[0].getDatanodeUuid()
|
||||||
|
.equals(providedDatanode.getDatanodeUuid()));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user