HDFS-15644. Failed volumes can cause DNs to stop block reporting. Contributed by Ahmed Hussein.

This commit is contained in:
Wei-Chiu Chuang 2020-10-23 12:05:53 -07:00
parent e7aa4da0a2
commit 74634eb002

View File

@ -1957,28 +1957,32 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
continue; continue;
} }
String volStorageID = b.getVolume().getStorageID(); String volStorageID = b.getVolume().getStorageID();
if (!builders.containsKey(volStorageID)) { switch(b.getState()) {
case FINALIZED:
case RBW:
case RWR:
break;
case RUR:
// use the original replica.
b = b.getOriginalReplica();
break;
case TEMPORARY:
continue;
default:
assert false : "Illegal ReplicaInfo state.";
continue;
}
BlockListAsLongs.Builder storageBuilder = builders.get(volStorageID);
// a storage in the process of failing will not be in the volumes list
// but will be in the replica map.
if (storageBuilder != null) {
storageBuilder.add(b);
} else {
if (!missingVolumesReported.contains(volStorageID)) { if (!missingVolumesReported.contains(volStorageID)) {
LOG.warn("Storage volume: " + volStorageID + " missing for the" LOG.warn("Storage volume: " + volStorageID + " missing for the"
+ " replica block: " + b + ". Probably being removed!"); + " replica block: " + b + ". Probably being removed!");
missingVolumesReported.add(volStorageID); missingVolumesReported.add(volStorageID);
} }
continue;
}
switch(b.getState()) {
case FINALIZED:
case RBW:
case RWR:
builders.get(volStorageID).add(b);
break;
case RUR:
ReplicaInfo orig = b.getOriginalReplica();
builders.get(volStorageID).add(orig);
break;
case TEMPORARY:
break;
default:
assert false : "Illegal ReplicaInfo state.";
} }
} }
} }