HDFS-15798. EC: Reconstruct task failed, and It would be XmitsInProgress of DN has negative number. Contributed by huhaiyang

This commit is contained in:
fermi 2021-02-05 11:01:49 +08:00
parent b99dba38b9
commit 7ef2875c89

View File

@ -121,7 +121,6 @@ private void initializeStripedBlkReconstructionThreadPool(int numThreads) {
public void processErasureCodingTasks(
Collection<BlockECReconstructionInfo> ecTasks) {
for (BlockECReconstructionInfo reconInfo : ecTasks) {
int xmitsSubmitted = 0;
try {
StripedReconstructionInfo stripedReconInfo =
new StripedReconstructionInfo(
@ -134,20 +133,19 @@ public void processErasureCodingTasks(
final StripedBlockReconstructor task =
new StripedBlockReconstructor(this, stripedReconInfo);
if (task.hasValidTargets()) {
stripedReconstructionPool.submit(task);
// See HDFS-12044. We increase xmitsInProgress even the task is only
// enqueued, so that
// 1) NN will not send more tasks than what DN can execute and
// 2) DN will not throw away reconstruction tasks, and instead keeps
// an unbounded number of tasks in the executor's task queue.
xmitsSubmitted = Math.max((int)(task.getXmits() * xmitWeight), 1);
int xmitsSubmitted = Math.max((int)(task.getXmits() * xmitWeight), 1);
getDatanode().incrementXmitsInProcess(xmitsSubmitted);
stripedReconstructionPool.submit(task);
} else {
LOG.warn("No missing internal block. Skip reconstruction for task:{}",
reconInfo);
}
} catch (Throwable e) {
getDatanode().decrementXmitsInProgress(xmitsSubmitted);
LOG.warn("Failed to reconstruct striped block {}",
reconInfo.getExtendedBlock().getLocalBlock(), e);
}