From e66e287efe2b43e710137a628f03c7df3ebdf498 Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Fri, 13 Apr 2018 09:17:34 -0700 Subject: [PATCH] HDFS-13330. ShortCircuitCache#fetchOrCreate never retries. Contributed by Gabor Bota. --- .../hdfs/shortcircuit/ShortCircuitCache.java | 11 +++++--- .../shortcircuit/TestShortCircuitCache.java | 26 +++++++++++++++++++ 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitCache.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitCache.java index b26652b0be..c2f0350bc3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/shortcircuit/ShortCircuitCache.java @@ -664,6 +664,7 @@ private void purge(ShortCircuitReplica replica) { unref(replica); } + static final int FETCH_OR_CREATE_RETRY_TIMES = 3; /** * Fetch or create a replica. * @@ -678,11 +679,11 @@ private void purge(ShortCircuitReplica replica) { */ public ShortCircuitReplicaInfo fetchOrCreate(ExtendedBlockId key, ShortCircuitReplicaCreator creator) { - Waitable newWaitable = null; + Waitable newWaitable; lock.lock(); try { ShortCircuitReplicaInfo info = null; - do { + for (int i = 0; i < FETCH_OR_CREATE_RETRY_TIMES; i++){ if (closed) { LOG.trace("{}: can't fethchOrCreate {} because the cache is closed.", this, key); @@ -692,11 +693,12 @@ public ShortCircuitReplicaInfo fetchOrCreate(ExtendedBlockId key, if (waitable != null) { try { info = fetch(key, waitable); + break; } catch (RetriableException e) { LOG.debug("{}: retrying {}", this, e.getMessage()); } } - } while (false); + } if (info != null) return info; // We need to load the replica ourselves. newWaitable = new Waitable<>(lock.newCondition()); @@ -717,7 +719,8 @@ public ShortCircuitReplicaInfo fetchOrCreate(ExtendedBlockId key, * * @throws RetriableException If the caller needs to retry. */ - private ShortCircuitReplicaInfo fetch(ExtendedBlockId key, + @VisibleForTesting // ONLY for testing + protected ShortCircuitReplicaInfo fetch(ExtendedBlockId key, Waitable waitable) throws RetriableException { // Another thread is already in the process of loading this // ShortCircuitReplica. So we simply wait for it to complete. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/shortcircuit/TestShortCircuitCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/shortcircuit/TestShortCircuitCache.java index 7ba0edcecc..5da6a25055 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/shortcircuit/TestShortCircuitCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/shortcircuit/TestShortCircuitCache.java @@ -65,6 +65,7 @@ import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.ShmId; import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.Slot; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.ipc.RetriableException; import org.apache.hadoop.net.unix.DomainSocket; import org.apache.hadoop.net.unix.TemporarySocketDirectory; import org.apache.hadoop.security.token.SecretManager.InvalidToken; @@ -793,4 +794,29 @@ public void testPreReceiptVerificationDfsClientCanDoScr() throws Exception { cluster.shutdown(); sockDir.close(); } + + @Test + public void testFetchOrCreateRetries() throws Exception { + try(ShortCircuitCache cache = Mockito + .spy(new ShortCircuitCache(10, 10000000, 10, 10000000, 1, 10000, 0))) { + final TestFileDescriptorPair pair = new TestFileDescriptorPair(); + ExtendedBlockId extendedBlockId = new ExtendedBlockId(123, "test_bp1"); + SimpleReplicaCreator sRC = new SimpleReplicaCreator(123, cache, pair); + + // Arrange that fetch will throw RetriableException for any call + Mockito.doThrow(new RetriableException("Retry")).when(cache) + .fetch(Mockito.eq(extendedBlockId), Mockito.any()); + + // Act: calling fetchOrCreate two times + // first call: it will create and put entry to replicaInfoMap + // second call: it will call fetch to get info for entry, and should + // retry 3 times because RetriableException thrown + cache.fetchOrCreate(extendedBlockId, sRC); + cache.fetchOrCreate(extendedBlockId, sRC); + + // Assert that fetchOrCreate retried to fetch at least 3 times + Mockito.verify(cache, Mockito.atLeast(3)) + .fetch(Mockito.eq(extendedBlockId), Mockito.any()); + } + } }