HDFS-15695. NN should not let the balancer run in safemode (#2489). Contributed by Daryn Sharp and Ahmed Hussein
This commit is contained in:
parent
fa773a8326
commit
2b5b556dd7
@ -663,6 +663,7 @@ public BlocksWithLocations getBlocks(DatanodeInfo datanode, long size, long
|
||||
}
|
||||
checkNNStartup();
|
||||
namesystem.checkSuperuserPrivilege();
|
||||
namesystem.checkNameNodeSafeMode("Cannot execute getBlocks");
|
||||
return namesystem.getBlocks(datanode, size, minBlockSize);
|
||||
}
|
||||
|
||||
|
@ -19,8 +19,8 @@
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
@ -28,37 +28,50 @@
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.BlockLocation;
|
||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.RemoteIterator;
|
||||
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
|
||||
import org.apache.hadoop.hdfs.protocol.Block;
|
||||
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.TestBlockManager;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNode;
|
||||
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||
import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocations;
|
||||
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
|
||||
import org.apache.hadoop.ipc.RemoteException;
|
||||
import org.apache.hadoop.test.LambdaTestUtils;
|
||||
|
||||
import org.junit.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* This class tests if getblocks request works correctly.
|
||||
*/
|
||||
public class TestGetBlocks {
|
||||
private static final int blockSize = 8192;
|
||||
private static final String racks[] = new String[] { "/d1/r1", "/d1/r1",
|
||||
"/d1/r2", "/d1/r2", "/d1/r2", "/d2/r3", "/d2/r3" };
|
||||
private static final int numDatanodes = racks.length;
|
||||
private static final Logger LOG =
|
||||
LoggerFactory.getLogger(TestBlockManager.class);
|
||||
|
||||
private static final int BLOCK_SIZE = 8192;
|
||||
private static final String[] RACKS = new String[]{"/d1/r1", "/d1/r1",
|
||||
"/d1/r2", "/d1/r2", "/d1/r2", "/d2/r3", "/d2/r3"};
|
||||
private static final int NUM_DATA_NODES = RACKS.length;
|
||||
|
||||
/**
|
||||
* Stop the heartbeat of a datanode in the MiniDFSCluster
|
||||
@ -96,7 +109,7 @@ public void testReadSelectNonStaleDatanode() throws Exception {
|
||||
conf.setLong(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY,
|
||||
staleInterval);
|
||||
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
|
||||
.numDataNodes(numDatanodes).racks(racks).build();
|
||||
.numDataNodes(NUM_DATA_NODES).racks(RACKS).build();
|
||||
|
||||
cluster.waitActive();
|
||||
InetSocketAddress addr = new InetSocketAddress("localhost",
|
||||
@ -105,7 +118,7 @@ public void testReadSelectNonStaleDatanode() throws Exception {
|
||||
List<DatanodeDescriptor> nodeInfoList = cluster.getNameNode()
|
||||
.getNamesystem().getBlockManager().getDatanodeManager()
|
||||
.getDatanodeListForReport(DatanodeReportType.LIVE);
|
||||
assertEquals("Unexpected number of datanodes", numDatanodes,
|
||||
assertEquals("Unexpected number of datanodes", NUM_DATA_NODES,
|
||||
nodeInfoList.size());
|
||||
FileSystem fileSys = cluster.getFileSystem();
|
||||
FSDataOutputStream stm = null;
|
||||
@ -116,14 +129,14 @@ public void testReadSelectNonStaleDatanode() throws Exception {
|
||||
stm = fileSys.create(fileName, true,
|
||||
fileSys.getConf().getInt(
|
||||
CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096),
|
||||
(short) 3, blockSize);
|
||||
stm.write(new byte[(blockSize * 3) / 2]);
|
||||
(short) 3, BLOCK_SIZE);
|
||||
stm.write(new byte[(BLOCK_SIZE * 3) / 2]);
|
||||
// We do not close the stream so that
|
||||
// the writing seems to be still ongoing
|
||||
stm.hflush();
|
||||
|
||||
LocatedBlocks blocks = client.getNamenode().getBlockLocations(
|
||||
fileName.toString(), 0, blockSize);
|
||||
fileName.toString(), 0, BLOCK_SIZE);
|
||||
DatanodeInfo[] nodes = blocks.get(0).getLocations();
|
||||
assertEquals(nodes.length, 3);
|
||||
DataNode staleNode = null;
|
||||
@ -139,7 +152,7 @@ public void testReadSelectNonStaleDatanode() throws Exception {
|
||||
-(staleInterval + 1));
|
||||
|
||||
LocatedBlocks blocksAfterStale = client.getNamenode().getBlockLocations(
|
||||
fileName.toString(), 0, blockSize);
|
||||
fileName.toString(), 0, BLOCK_SIZE);
|
||||
DatanodeInfo[] nodesAfterStale = blocksAfterStale.get(0).getLocations();
|
||||
assertEquals(nodesAfterStale.length, 3);
|
||||
assertEquals(nodesAfterStale[2].getHostName(), nodes[0].getHostName());
|
||||
@ -175,133 +188,128 @@ public void testReadSelectNonStaleDatanode() throws Exception {
|
||||
}
|
||||
}
|
||||
|
||||
/** test getBlocks */
|
||||
/**
|
||||
* Test getBlocks.
|
||||
*/
|
||||
@Test
|
||||
public void testGetBlocks() throws Exception {
|
||||
final Configuration CONF = new HdfsConfiguration();
|
||||
DistributedFileSystem fs = null;
|
||||
Path testFile = null;
|
||||
BlockWithLocations[] locs;
|
||||
final int blkSize = 1024;
|
||||
final String filePath = "/tmp.txt";
|
||||
final int blkLocsSize = 13;
|
||||
long fileLen = 12 * blkSize + 1;
|
||||
final short replicationFactor = (short) 2;
|
||||
final Configuration config = new HdfsConfiguration();
|
||||
|
||||
final short REPLICATION_FACTOR = (short) 2;
|
||||
final int DEFAULT_BLOCK_SIZE = 1024;
|
||||
// set configurations
|
||||
config.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blkSize);
|
||||
config.setLong(DFSConfigKeys.DFS_BALANCER_GETBLOCKS_MIN_BLOCK_SIZE_KEY,
|
||||
blkSize);
|
||||
|
||||
CONF.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, DEFAULT_BLOCK_SIZE);
|
||||
CONF.setLong(DFSConfigKeys.DFS_BALANCER_GETBLOCKS_MIN_BLOCK_SIZE_KEY,
|
||||
DEFAULT_BLOCK_SIZE);
|
||||
MiniDFSCluster cluster = new MiniDFSCluster.Builder(config)
|
||||
.numDataNodes(replicationFactor)
|
||||
.storagesPerDatanode(4)
|
||||
.build();
|
||||
|
||||
MiniDFSCluster cluster = new MiniDFSCluster.Builder(CONF)
|
||||
.numDataNodes(REPLICATION_FACTOR)
|
||||
.storagesPerDatanode(4)
|
||||
.build();
|
||||
try {
|
||||
cluster.waitActive();
|
||||
// the third block will not be visible to getBlocks
|
||||
long fileLen = 12 * DEFAULT_BLOCK_SIZE + 1;
|
||||
DFSTestUtil.createFile(cluster.getFileSystem(), new Path("/tmp.txt"),
|
||||
fileLen, REPLICATION_FACTOR, 0L);
|
||||
testFile = new Path(filePath);
|
||||
DFSTestUtil.createFile(cluster.getFileSystem(), testFile,
|
||||
fileLen, replicationFactor, 0L);
|
||||
|
||||
// get blocks & data nodes
|
||||
List<LocatedBlock> locatedBlocks;
|
||||
DatanodeInfo[] dataNodes = null;
|
||||
boolean notWritten;
|
||||
final DFSClient dfsclient = new DFSClient(
|
||||
DFSUtilClient.getNNAddress(CONF), CONF);
|
||||
do {
|
||||
locatedBlocks = dfsclient.getNamenode()
|
||||
.getBlockLocations("/tmp.txt", 0, fileLen).getLocatedBlocks();
|
||||
assertEquals(13, locatedBlocks.size());
|
||||
notWritten = false;
|
||||
for (int i = 0; i < 2; i++) {
|
||||
dataNodes = locatedBlocks.get(i).getLocations();
|
||||
if (dataNodes.length != REPLICATION_FACTOR) {
|
||||
notWritten = true;
|
||||
try {
|
||||
Thread.sleep(10);
|
||||
} catch (InterruptedException e) {
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (notWritten);
|
||||
dfsclient.close();
|
||||
|
||||
fs = cluster.getFileSystem();
|
||||
DFSTestUtil.waitForReplication(fs, testFile, replicationFactor, 60000);
|
||||
RemoteIterator<LocatedFileStatus> it = fs.listLocatedStatus(testFile);
|
||||
LocatedFileStatus stat = it.next();
|
||||
BlockLocation[] blockLocations = stat.getBlockLocations();
|
||||
assertEquals(blkLocsSize, blockLocations.length);
|
||||
HdfsDataInputStream dis = (HdfsDataInputStream) fs.open(testFile);
|
||||
Collection<LocatedBlock> dinfo = dis.getAllBlocks();
|
||||
dis.close();
|
||||
DatanodeInfo[] dataNodes = dinfo.iterator().next().getLocations();
|
||||
// get RPC client to namenode
|
||||
InetSocketAddress addr = new InetSocketAddress("localhost",
|
||||
cluster.getNameNodePort());
|
||||
NamenodeProtocol namenode = NameNodeProxies.createProxy(CONF,
|
||||
NamenodeProtocol namenode = NameNodeProxies.createProxy(config,
|
||||
DFSUtilClient.getNNUri(addr), NamenodeProtocol.class).getProxy();
|
||||
|
||||
// get blocks of size fileLen from dataNodes[0], with minBlockSize as
|
||||
// fileLen
|
||||
BlockWithLocations[] locs;
|
||||
|
||||
// Should return all 13 blocks, as minBlockSize is not passed
|
||||
locs = namenode.getBlocks(dataNodes[0], fileLen, 0)
|
||||
.getBlocks();
|
||||
assertEquals(13, locs.length);
|
||||
assertEquals(locs[0].getStorageIDs().length, 2);
|
||||
assertEquals(locs[1].getStorageIDs().length, 2);
|
||||
locs = namenode.getBlocks(dataNodes[0], fileLen, 0).getBlocks();
|
||||
assertEquals(blkLocsSize, locs.length);
|
||||
|
||||
// Should return 12 blocks, as minBlockSize is DEFAULT_BLOCK_SIZE
|
||||
locs = namenode.getBlocks(dataNodes[0], fileLen, DEFAULT_BLOCK_SIZE)
|
||||
.getBlocks();
|
||||
assertEquals(12, locs.length);
|
||||
assertEquals(locs[0].getStorageIDs().length, 2);
|
||||
assertEquals(locs[1].getStorageIDs().length, 2);
|
||||
assertEquals(locs[0].getStorageIDs().length, replicationFactor);
|
||||
assertEquals(locs[1].getStorageIDs().length, replicationFactor);
|
||||
|
||||
// Should return 12 blocks, as minBlockSize is blkSize
|
||||
locs = namenode.getBlocks(dataNodes[0], fileLen, blkSize).getBlocks();
|
||||
assertEquals(blkLocsSize - 1, locs.length);
|
||||
assertEquals(locs[0].getStorageIDs().length, replicationFactor);
|
||||
assertEquals(locs[1].getStorageIDs().length, replicationFactor);
|
||||
|
||||
// get blocks of size BlockSize from dataNodes[0]
|
||||
locs = namenode.getBlocks(dataNodes[0], DEFAULT_BLOCK_SIZE,
|
||||
DEFAULT_BLOCK_SIZE).getBlocks();
|
||||
locs = namenode.getBlocks(dataNodes[0], blkSize,
|
||||
blkSize).getBlocks();
|
||||
assertEquals(locs.length, 1);
|
||||
assertEquals(locs[0].getStorageIDs().length, 2);
|
||||
assertEquals(locs[0].getStorageIDs().length, replicationFactor);
|
||||
|
||||
// get blocks of size 1 from dataNodes[0]
|
||||
locs = namenode.getBlocks(dataNodes[0], 1, 1).getBlocks();
|
||||
assertEquals(locs.length, 1);
|
||||
assertEquals(locs[0].getStorageIDs().length, 2);
|
||||
assertEquals(locs[0].getStorageIDs().length, replicationFactor);
|
||||
|
||||
// get blocks of size 0 from dataNodes[0]
|
||||
getBlocksWithException(namenode, dataNodes[0], 0, 0);
|
||||
getBlocksWithException(namenode, dataNodes[0], 0, 0,
|
||||
RemoteException.class, "IllegalArgumentException");
|
||||
|
||||
// get blocks of size -1 from dataNodes[0]
|
||||
getBlocksWithException(namenode, dataNodes[0], -1, 0);
|
||||
getBlocksWithException(namenode, dataNodes[0], -1, 0,
|
||||
RemoteException.class, "IllegalArgumentException");
|
||||
|
||||
// minBlockSize is -1
|
||||
getBlocksWithException(namenode, dataNodes[0], DEFAULT_BLOCK_SIZE, -1);
|
||||
getBlocksWithException(namenode, dataNodes[0], blkSize, -1,
|
||||
RemoteException.class, "IllegalArgumentException");
|
||||
|
||||
// get blocks of size BlockSize from a non-existent datanode
|
||||
DatanodeInfo info = DFSTestUtil.getDatanodeInfo("1.2.3.4");
|
||||
getBlocksWithIncorrectDatanodeException(namenode, info, 2, 0);
|
||||
|
||||
getBlocksWithException(namenode, info, replicationFactor, 0,
|
||||
RemoteException.class, "HadoopIllegalArgumentException");
|
||||
|
||||
testBlockIterator(cluster);
|
||||
} finally {
|
||||
|
||||
// Namenode should refuse to provide block locations to the balancer
|
||||
// while in safemode.
|
||||
locs = namenode.getBlocks(dataNodes[0], fileLen, 0).getBlocks();
|
||||
assertEquals(blkLocsSize, locs.length);
|
||||
assertFalse(fs.isInSafeMode());
|
||||
LOG.info("Entering safe mode");
|
||||
fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
|
||||
LOG.info("Entered safe mode");
|
||||
assertTrue(fs.isInSafeMode());
|
||||
getBlocksWithException(namenode, info, replicationFactor, 0,
|
||||
RemoteException.class,
|
||||
"Cannot execute getBlocks. Name node is in safe mode.");
|
||||
fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
|
||||
assertFalse(fs.isInSafeMode());
|
||||
} finally {
|
||||
if (fs != null) {
|
||||
fs.delete(testFile, true);
|
||||
fs.close();
|
||||
}
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
private void getBlocksWithException(NamenodeProtocol namenode,
|
||||
DatanodeInfo datanode, long size, long minBlockSize) throws IOException {
|
||||
boolean getException = false;
|
||||
try {
|
||||
namenode.getBlocks(datanode, size, minBlockSize);
|
||||
} catch (RemoteException e) {
|
||||
getException = true;
|
||||
assertTrue(e.getClassName().contains("IllegalArgumentException"));
|
||||
}
|
||||
assertTrue(getException);
|
||||
}
|
||||
DatanodeInfo datanode, long size, long minBlkSize, Class exClass,
|
||||
String msg) throws Exception {
|
||||
|
||||
private void getBlocksWithIncorrectDatanodeException(
|
||||
NamenodeProtocol namenode, DatanodeInfo datanode, long size,
|
||||
long minBlockSize)
|
||||
throws IOException {
|
||||
boolean getException = false;
|
||||
try {
|
||||
namenode.getBlocks(datanode, size, minBlockSize);
|
||||
} catch (RemoteException e) {
|
||||
getException = true;
|
||||
assertTrue(e.getClassName().contains("HadoopIllegalArgumentException"));
|
||||
}
|
||||
assertTrue(getException);
|
||||
// Namenode should refuse should fail
|
||||
LambdaTestUtils.intercept(exClass,
|
||||
msg, () -> namenode.getBlocks(datanode, size, minBlkSize));
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user