HDFS-13898. [SBN read] Throw retriable exception for getBlockLocations when ObserverNameNode is in safemode. Contributed by Chao Sun.

This commit is contained in:
Erik Krogen 2018-09-21 14:57:52 -07:00 committed by Konstantin V Shvachko
parent 25b63e8da8
commit b74a7dbf88
3 changed files with 84 additions and 2 deletions

View File

@ -93,6 +93,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LI
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY;
import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*;
import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.ACTIVE;
import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.OBSERVER;
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo;
import org.apache.hadoop.hdfs.protocol.OpenFilesIterator.OpenFilesType;
@ -463,7 +465,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
/** The namespace tree. */
FSDirectory dir;
private final BlockManager blockManager;
private BlockManager blockManager;
private final SnapshotManager snapshotManager;
private final CacheManager cacheManager;
private final DatanodeStatistics datanodeStatistics;
@ -1966,7 +1968,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
SafeModeException se = newSafemodeException(
"Zero blocklocations for " + srcArg);
if (haEnabled && haContext != null &&
haContext.getState().getServiceState() == HAServiceState.ACTIVE) {
(haContext.getState().getServiceState() == ACTIVE ||
haContext.getState().getServiceState() == OBSERVER)) {
throw new RetriableException(se);
} else {
throw se;
@ -6301,6 +6304,11 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
return blockManager;
}
@VisibleForTesting
public void setBlockManagerForTesting(BlockManager bm) {
this.blockManager = bm;
}
/** @return the FSDirectory. */
@Override
public FSDirectory getFSDirectory() {

View File

@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
import static org.mockito.Mockito.spy;
@ -223,6 +224,12 @@ public class NameNodeAdapter {
return fsnSpy;
}
public static BlockManager spyOnBlockManager(NameNode nn) {
BlockManager bmSpy = Mockito.spy(nn.getNamesystem().getBlockManager());
nn.getNamesystem().setBlockManagerForTesting(bmSpy);
return bmSpy;
}
public static ReentrantReadWriteLock spyOnFsLock(FSNamesystem fsn) {
ReentrantReadWriteLock spy = Mockito.spy(fsn.getFsLockForTests());
fsn.setFsLockForTests(spy);

View File

@ -24,8 +24,15 @@ import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.io.retry.FailoverProxyProvider;
import org.apache.hadoop.io.retry.RetryInvocationHandler;
import org.apache.hadoop.test.GenericTestUtils;
@ -38,9 +45,12 @@ import java.io.File;
import java.io.IOException;
import java.lang.reflect.Proxy;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.TimeUnit;
import static org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY;
@ -48,6 +58,13 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyBoolean;
import static org.mockito.Matchers.anyLong;
import static org.mockito.Matchers.anyShort;
import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.doNothing;
// Main unit tests for ObserverNode
public class TestObserverNode {
@ -299,6 +316,56 @@ public class TestObserverNode {
assertEquals(0, rc);
}
/**
* Test the case where Observer should throw RetriableException, just like
* active NN, for certain open() calls where block locations are not
* available. See HDFS-13898 for details.
*/
@Test
public void testObserverNodeSafeModeWithBlockLocations() throws Exception {
setUpCluster(1);
setObserverRead(true);
// Avoid starting DNs for the mini cluster.
BlockManager bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[0]);
doNothing().when(bmSpy)
.verifyReplication(anyString(), anyShort(), anyString());
// Create a new file - the request should go to active.
dfs.createNewFile(testPath);
assertSentTo(0);
rollEditLogAndTail(0);
dfs.open(testPath);
assertSentTo(2);
// Set observer to safe mode.
dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_ENTER);
// Mock block manager for observer to generate some fake blocks which
// will trigger the (retriable) safe mode exception.
final DatanodeInfo[] empty = {};
bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[2]);
doAnswer((invocation) -> {
ExtendedBlock b = new ExtendedBlock("fake-pool", new Block(12345L));
LocatedBlock fakeBlock = new LocatedBlock(b, empty);
List<LocatedBlock> fakeBlocks = new ArrayList<>();
fakeBlocks.add(fakeBlock);
return new LocatedBlocks(0, false, fakeBlocks, null, true, null, null);
}).when(bmSpy).createLocatedBlocks(any(), anyLong(), anyBoolean(),
anyLong(), anyLong(), anyBoolean(), anyBoolean(), any(), any());
// Open the file again - it should throw retriable exception and then
// failover to active.
dfs.open(testPath);
assertSentTo(0);
// Remove safe mode on observer, request should still go to it.
dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
dfs.open(testPath);
assertSentTo(2);
}
// TODO this does not currently work because fetching the service state from
// e.g. the StandbyNameNode also waits for the transaction ID to catch up.
// This is disabled pending HDFS-13872 and HDFS-13749.