HDFS-8920. Erasure Coding: when recovering lost blocks, logs can be too verbose and hurt performance. Contributed by Rui Li
This commit is contained in:
parent
1080c37300
commit
7bff8ca1c8
@ -444,3 +444,6 @@
|
||||
|
||||
HDFS-9091. Erasure Coding: Provide DistributedFilesystem API to
|
||||
getAllErasureCodingPolicies. (Rakesh R via zhz)
|
||||
|
||||
HDFS-8920. Erasure Coding: when recovering lost blocks, logs can be too
|
||||
verbose and hurt performance. (Rui Li via Kai Zheng)
|
@ -1057,9 +1057,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
|
||||
}
|
||||
}
|
||||
if (chosenNode == null) {
|
||||
DFSClient.LOG.warn("No live nodes contain block " + block.getBlock() +
|
||||
" after checking nodes = " + Arrays.toString(nodes) +
|
||||
", ignoredNodes = " + ignoredNodes);
|
||||
reportLostBlock(block, ignoredNodes);
|
||||
return null;
|
||||
}
|
||||
final String dnAddr =
|
||||
@ -1071,6 +1069,17 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
|
||||
return new DNAddrPair(chosenNode, targetAddr, storageType);
|
||||
}
|
||||
|
||||
/**
|
||||
* Warn the user of a lost block
|
||||
*/
|
||||
protected void reportLostBlock(LocatedBlock lostBlock,
|
||||
Collection<DatanodeInfo> ignoredNodes) {
|
||||
DatanodeInfo[] nodes = lostBlock.getLocations();
|
||||
DFSClient.LOG.warn("No live nodes contain block " + lostBlock.getBlock() +
|
||||
" after checking nodes = " + Arrays.toString(nodes) +
|
||||
", ignoredNodes = " + ignoredNodes);
|
||||
}
|
||||
|
||||
private static String getBestNodeDNAddrPairErrorString(
|
||||
DatanodeInfo nodes[], AbstractMap<DatanodeInfo,
|
||||
DatanodeInfo> deadNodes, Collection<DatanodeInfo> ignoredNodes) {
|
||||
|
@ -45,8 +45,11 @@ import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
import java.io.InterruptedIOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
@ -154,6 +157,17 @@ public class DFSStripedInputStream extends DFSInputStream {
|
||||
private StripeRange curStripeRange;
|
||||
private final CompletionService<Void> readingService;
|
||||
|
||||
/**
|
||||
* When warning the user of a lost block in striping mode, we remember the
|
||||
* dead nodes we've logged. All other striping blocks on these nodes can be
|
||||
* considered lost too, and we don't want to log a warning for each of them.
|
||||
* This is to prevent the log from being too verbose. Refer to HDFS-8920.
|
||||
*
|
||||
* To minimize the overhead, we only store the datanodeUuid in this set
|
||||
*/
|
||||
private final Set<String> warnedNodes = Collections.newSetFromMap(
|
||||
new ConcurrentHashMap<String, Boolean>());
|
||||
|
||||
DFSStripedInputStream(DFSClient dfsClient, String src,
|
||||
boolean verifyChecksum, ErasureCodingPolicy ecPolicy,
|
||||
LocatedBlocks locatedBlocks) throws IOException {
|
||||
@ -527,6 +541,26 @@ public class DFSStripedInputStream extends DFSInputStream {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void reportLostBlock(LocatedBlock lostBlock,
|
||||
Collection<DatanodeInfo> ignoredNodes) {
|
||||
DatanodeInfo[] nodes = lostBlock.getLocations();
|
||||
if (nodes != null && nodes.length > 0) {
|
||||
List<String> dnUUIDs = new ArrayList<>();
|
||||
for (DatanodeInfo node : nodes) {
|
||||
dnUUIDs.add(node.getDatanodeUuid());
|
||||
}
|
||||
if (!warnedNodes.containsAll(dnUUIDs)) {
|
||||
DFSClient.LOG.warn(Arrays.toString(nodes) + " are unavailable and " +
|
||||
"all striping blocks on them are lost. " +
|
||||
"IgnoredNodes = " + ignoredNodes);
|
||||
warnedNodes.addAll(dnUUIDs);
|
||||
}
|
||||
} else {
|
||||
super.reportLostBlock(lostBlock, ignoredNodes);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The reader for reading a complete {@link AlignedStripe}. Note that an
|
||||
* {@link AlignedStripe} may cross multiple stripes with cellSize width.
|
||||
|
Loading…
x
Reference in New Issue
Block a user