HDFS-9373. Erasure coding: friendly log information for write operations with some failed streamers. Contributed by Li Bo.
Change-Id: Ie8ab4ae00e9ee0eb03c32a54bea26a3524308038
This commit is contained in:
parent
4e7d32c0db
commit
5104077e1f
@ -27,6 +27,7 @@
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@ -250,6 +251,8 @@ private void flipDataBuffers() {
|
||||
private ExtendedBlock currentBlockGroup;
|
||||
private final String[] favoredNodes;
|
||||
private final List<StripedDataStreamer> failedStreamers;
|
||||
private final Map<Integer, Integer> corruptBlockCountMap;
|
||||
private int blockGroupIndex;
|
||||
|
||||
/** Construct a new output stream for creating a file. */
|
||||
DFSStripedOutputStream(DFSClient dfsClient, String src, HdfsFileStatus stat,
|
||||
@ -268,6 +271,7 @@ private void flipDataBuffers() {
|
||||
numAllBlocks = numDataBlocks + numParityBlocks;
|
||||
this.favoredNodes = favoredNodes;
|
||||
failedStreamers = new ArrayList<>();
|
||||
corruptBlockCountMap = new LinkedHashMap<>();
|
||||
|
||||
encoder = CodecUtil.createRSRawEncoder(dfsClient.getConfiguration(),
|
||||
numDataBlocks, numParityBlocks);
|
||||
@ -444,6 +448,7 @@ private void allocateNewBlock() throws IOException {
|
||||
}
|
||||
// assign the new block to the current block group
|
||||
currentBlockGroup = lb.getBlock();
|
||||
blockGroupIndex++;
|
||||
|
||||
final LocatedBlock[] blocks = StripedBlockUtil.parseStripedBlockGroup(
|
||||
(LocatedStripedBlock) lb, cellSize, numDataBlocks,
|
||||
@ -590,6 +595,7 @@ private void checkStreamerFailures() throws IOException {
|
||||
while (newFailed.size() > 0) {
|
||||
failedStreamers.addAll(newFailed);
|
||||
coordinator.clearFailureStates();
|
||||
corruptBlockCountMap.put(blockGroupIndex, failedStreamers.size());
|
||||
|
||||
// mark all the healthy streamers as external error
|
||||
Set<StripedDataStreamer> healthySet = markExternalErrorOnStreamers();
|
||||
@ -957,6 +963,7 @@ protected synchronized void closeImpl() throws IOException {
|
||||
dfsClient.getTracer().newScope("completeFile")) {
|
||||
completeFile(currentBlockGroup);
|
||||
}
|
||||
logCorruptBlocks();
|
||||
} catch (ClosedChannelException ignored) {
|
||||
} finally {
|
||||
setClosed();
|
||||
@ -1004,6 +1011,20 @@ static void sleep(long ms, String op) throws InterruptedIOException {
|
||||
}
|
||||
}
|
||||
|
||||
private void logCorruptBlocks() {
|
||||
for (Map.Entry<Integer, Integer> entry : corruptBlockCountMap.entrySet()) {
|
||||
int bgIndex = entry.getKey();
|
||||
int corruptBlockCount = entry.getValue();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("Block group <").append(bgIndex).append("> has ")
|
||||
.append(corruptBlockCount).append(" corrupt blocks.");
|
||||
if (corruptBlockCount == numAllBlocks - numDataBlocks) {
|
||||
sb.append(" It's at high risk of losing data.");
|
||||
}
|
||||
LOG.warn(sb.toString());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
ExtendedBlock getBlock() {
|
||||
return currentBlockGroup;
|
||||
|
@ -870,6 +870,9 @@ Trunk (Unreleased)
|
||||
HDFS-9348. Erasure Coding: DFS GetErasureCodingPolicy API on a non-existent
|
||||
file should be handled properly. (Rakesh R via umamahesh)
|
||||
|
||||
HDFS-9373. Erasure coding: friendly log information for write operations
|
||||
with some failed streamers. (Li Bo via zhz)
|
||||
|
||||
HDFS-9451. Clean up depreated umasks and related unit tests.
|
||||
(Wei-Chiu Chuang via wheat9)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user