HDFS-13339. Volume reference can't be released and may lead to deadlock when DataXceiver does a check volume. Contributed by liaoyuxiangqin and Zsolt Venczel.

This commit is contained in:
Xiao Chen 2018-06-03 22:29:52 -07:00
parent 9c4cbed8d1
commit 9efb4b7db0
2 changed files with 15 additions and 1 deletions

View File

@ -46,6 +46,7 @@
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
@ -103,6 +104,8 @@ public class DatasetVolumeChecker {
private static final VolumeCheckContext IGNORED_CONTEXT =
new VolumeCheckContext();
private final ExecutorService checkVolumeResultHandlerExecutorService;
/**
* @param conf Configuration object.
* @param timer {@link Timer} object used for throttling checks.
@ -163,6 +166,12 @@ public DatasetVolumeChecker(Configuration conf, Timer timer)
.setNameFormat("DataNode DiskChecker thread %d")
.setDaemon(true)
.build()));
checkVolumeResultHandlerExecutorService = Executors.newCachedThreadPool(
new ThreadFactoryBuilder()
.setNameFormat("VolumeCheck ResultHandler thread %d")
.setDaemon(true)
.build());
}
/**
@ -292,7 +301,9 @@ public boolean checkVolume(
numVolumeChecks.incrementAndGet();
Futures.addCallback(olf.get(),
new ResultHandler(volumeReference, new HashSet<>(), new HashSet<>(),
new AtomicLong(1), callback));
new AtomicLong(1), callback),
checkVolumeResultHandlerExecutorService
);
return true;
} else {
IOUtils.cleanup(null, volumeReference);

View File

@ -25,6 +25,7 @@
import org.apache.hadoop.hdfs.server.datanode.StorageLocation;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.*;
import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi.VolumeCheckContext;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
import org.apache.hadoop.util.FakeTimer;
import org.junit.Rule;
@ -122,6 +123,8 @@ public void call(Set<FsVolumeSpi> healthyVolumes,
}
});
GenericTestUtils.waitFor(() -> numCallbackInvocations.get() > 0, 5, 10000);
// Ensure that the check was invoked at least once.
verify(volume, times(1)).check(anyObject());
if (result) {