HDFS-2100. Improve TestStorageRestore. Contributed by Aaron T. Myers.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1138262 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3f190b3e1a
commit
a851ce5dcb
@ -519,6 +519,8 @@ Trunk (unreleased changes)
|
|||||||
HDFS-1568. Improve the log messages in DataXceiver. (Joey Echeverria via
|
HDFS-1568. Improve the log messages in DataXceiver. (Joey Echeverria via
|
||||||
szetszwo)
|
szetszwo)
|
||||||
|
|
||||||
|
HDFS-2100. Improve TestStorageRestore. (atm)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-1458. Improve checkpoint performance by avoiding unnecessary image
|
HDFS-1458. Improve checkpoint performance by avoiding unnecessary image
|
||||||
|
@ -18,6 +18,10 @@
|
|||||||
package org.apache.hadoop.hdfs.server.namenode;
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
import java.io.DataInputStream;
|
import java.io.DataInputStream;
|
||||||
import java.io.EOFException;
|
import java.io.EOFException;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
@ -28,32 +32,35 @@
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
import java.util.Set;
|
||||||
import junit.framework.TestCase;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.cli.CLITestCmdDFS;
|
import org.apache.hadoop.cli.CLITestCmdDFS;
|
||||||
import org.apache.hadoop.cli.util.*;
|
import org.apache.hadoop.cli.util.CLICommandDFSAdmin;
|
||||||
|
import org.apache.hadoop.cli.util.CommandExecutor;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.FileUtil;
|
import org.apache.hadoop.fs.FileUtil;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
import org.apache.hadoop.hdfs.HdfsConfiguration;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.server.common.Storage;
|
import org.apache.hadoop.hdfs.server.common.Storage;
|
||||||
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
|
||||||
import org.apache.hadoop.hdfs.DFSConfigKeys;
|
import org.junit.After;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
/**
|
/**
|
||||||
* Startup and checkpoint tests
|
* Startup and checkpoint tests
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class TestStorageRestore extends TestCase {
|
public class TestStorageRestore {
|
||||||
public static final String NAME_NODE_HOST = "localhost:";
|
public static final String NAME_NODE_HOST = "localhost:";
|
||||||
public static final String NAME_NODE_HTTP_HOST = "0.0.0.0:";
|
public static final String NAME_NODE_HTTP_HOST = "0.0.0.0:";
|
||||||
private static final Log LOG =
|
private static final Log LOG =
|
||||||
@ -78,8 +85,8 @@ private void writeFile(FileSystem fileSys, Path name, int repl)
|
|||||||
stm.close();
|
stm.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
protected void setUp() throws Exception {
|
public void setUpNameDirs() throws Exception {
|
||||||
config = new HdfsConfiguration();
|
config = new HdfsConfiguration();
|
||||||
hdfsDir = new File(MiniDFSCluster.getBaseDirectory()).getCanonicalFile();
|
hdfsDir = new File(MiniDFSCluster.getBaseDirectory()).getCanonicalFile();
|
||||||
if ( hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) {
|
if ( hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) {
|
||||||
@ -116,7 +123,8 @@ protected void setUp() throws Exception {
|
|||||||
/**
|
/**
|
||||||
* clean up
|
* clean up
|
||||||
*/
|
*/
|
||||||
public void tearDown() throws Exception {
|
@After
|
||||||
|
public void cleanUpNameDirs() throws Exception {
|
||||||
if (hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) {
|
if (hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) {
|
||||||
throw new IOException("Could not delete hdfs directory in tearDown '" + hdfsDir + "'");
|
throw new IOException("Could not delete hdfs directory in tearDown '" + hdfsDir + "'");
|
||||||
}
|
}
|
||||||
@ -125,12 +133,13 @@ public void tearDown() throws Exception {
|
|||||||
/**
|
/**
|
||||||
* invalidate storage by removing storage directories
|
* invalidate storage by removing storage directories
|
||||||
*/
|
*/
|
||||||
public void invalidateStorage(FSImage fi) throws IOException {
|
public void invalidateStorage(FSImage fi, Set<File> filesToInvalidate) throws IOException {
|
||||||
ArrayList<StorageDirectory> al = new ArrayList<StorageDirectory>(2);
|
ArrayList<StorageDirectory> al = new ArrayList<StorageDirectory>(2);
|
||||||
Iterator<StorageDirectory> it = fi.getStorage().dirIterator();
|
Iterator<StorageDirectory> it = fi.getStorage().dirIterator();
|
||||||
while(it.hasNext()) {
|
while(it.hasNext()) {
|
||||||
StorageDirectory sd = it.next();
|
StorageDirectory sd = it.next();
|
||||||
if(sd.getRoot().equals(path2) || sd.getRoot().equals(path3)) {
|
if(filesToInvalidate.contains(sd.getRoot())) {
|
||||||
|
LOG.info("causing IO error on " + sd.getRoot());
|
||||||
al.add(sd);
|
al.add(sd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -142,16 +151,16 @@ public void invalidateStorage(FSImage fi) throws IOException {
|
|||||||
* test
|
* test
|
||||||
*/
|
*/
|
||||||
public void printStorages(FSImage fs) {
|
public void printStorages(FSImage fs) {
|
||||||
LOG.info("current storages and corresoponding sizes:");
|
LOG.info("current storages and corresponding sizes:");
|
||||||
for(Iterator<StorageDirectory> it = fs.getStorage().dirIterator(); it.hasNext(); ) {
|
for(Iterator<StorageDirectory> it = fs.getStorage().dirIterator(); it.hasNext(); ) {
|
||||||
StorageDirectory sd = it.next();
|
StorageDirectory sd = it.next();
|
||||||
|
|
||||||
if(sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) {
|
if(sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) {
|
||||||
File imf = fs.getStorage().getStorageFile(sd, NameNodeFile.IMAGE);
|
File imf = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE);
|
||||||
LOG.info(" image file " + imf.getAbsolutePath() + "; len = " + imf.length());
|
LOG.info(" image file " + imf.getAbsolutePath() + "; len = " + imf.length());
|
||||||
}
|
}
|
||||||
if(sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) {
|
if(sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) {
|
||||||
File edf = fs.getStorage().getStorageFile(sd, NameNodeFile.EDITS);
|
File edf = NNStorage.getStorageFile(sd, NameNodeFile.EDITS);
|
||||||
LOG.info(" edits file " + edf.getAbsolutePath() + "; len = " + edf.length());
|
LOG.info(" edits file " + edf.getAbsolutePath() + "; len = " + edf.length());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -282,6 +291,7 @@ public void checkFiles(boolean valid) throws IOException {
|
|||||||
* 8. verify that all the image and edits files are the same.
|
* 8. verify that all the image and edits files are the same.
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("deprecation")
|
@SuppressWarnings("deprecation")
|
||||||
|
@Test
|
||||||
public void testStorageRestore() throws Exception {
|
public void testStorageRestore() throws Exception {
|
||||||
int numDatanodes = 2;
|
int numDatanodes = 2;
|
||||||
cluster = new MiniDFSCluster.Builder(config).numDataNodes(numDatanodes)
|
cluster = new MiniDFSCluster.Builder(config).numDataNodes(numDatanodes)
|
||||||
@ -299,7 +309,7 @@ public void testStorageRestore() throws Exception {
|
|||||||
|
|
||||||
System.out.println("****testStorageRestore: file test written, invalidating storage...");
|
System.out.println("****testStorageRestore: file test written, invalidating storage...");
|
||||||
|
|
||||||
invalidateStorage(cluster.getNameNode().getFSImage());
|
invalidateStorage(cluster.getNameNode().getFSImage(), ImmutableSet.of(path2, path3));
|
||||||
//secondary.doCheckpoint(); // this will cause storages to be removed.
|
//secondary.doCheckpoint(); // this will cause storages to be removed.
|
||||||
printStorages(cluster.getNameNode().getFSImage());
|
printStorages(cluster.getNameNode().getFSImage());
|
||||||
System.out.println("****testStorageRestore: storage invalidated + doCheckpoint");
|
System.out.println("****testStorageRestore: storage invalidated + doCheckpoint");
|
||||||
@ -331,6 +341,7 @@ public void testStorageRestore() throws Exception {
|
|||||||
* Test dfsadmin -restoreFailedStorage command
|
* Test dfsadmin -restoreFailedStorage command
|
||||||
* @throws Exception
|
* @throws Exception
|
||||||
*/
|
*/
|
||||||
|
@Test
|
||||||
public void testDfsAdminCmd() throws Exception {
|
public void testDfsAdminCmd() throws Exception {
|
||||||
cluster = new MiniDFSCluster.Builder(config).
|
cluster = new MiniDFSCluster.Builder(config).
|
||||||
numDataNodes(2).
|
numDataNodes(2).
|
||||||
@ -362,7 +373,7 @@ public void testDfsAdminCmd() throws Exception {
|
|||||||
restore = fsi.getStorage().getRestoreFailedStorage();
|
restore = fsi.getStorage().getRestoreFailedStorage();
|
||||||
assertTrue("After set false call restore is " + restore, restore);
|
assertTrue("After set false call restore is " + restore, restore);
|
||||||
|
|
||||||
// run one more time - no change in value
|
// run one more time - no change in value
|
||||||
cmd = "-fs NAMENODE -restoreFailedStorage check";
|
cmd = "-fs NAMENODE -restoreFailedStorage check";
|
||||||
CommandExecutor.Result cmdResult = executor.executeCommand(cmd);
|
CommandExecutor.Result cmdResult = executor.executeCommand(cmd);
|
||||||
restore = fsi.getStorage().getRestoreFailedStorage();
|
restore = fsi.getStorage().getRestoreFailedStorage();
|
||||||
@ -376,4 +387,67 @@ public void testDfsAdminCmd() throws Exception {
|
|||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test to simulate interleaved checkpointing by 2 2NNs after a storage
|
||||||
|
* directory has been taken offline. The first will cause the directory to
|
||||||
|
* come back online, but it won't have any valid contents. The second 2NN will
|
||||||
|
* then try to perform a checkpoint. The NN should not serve up the image or
|
||||||
|
* edits from the restored (empty) dir.
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
|
@Test
|
||||||
|
public void testMultipleSecondaryCheckpoint() throws IOException {
|
||||||
|
|
||||||
|
SecondaryNameNode secondary = null;
|
||||||
|
try {
|
||||||
|
cluster = new MiniDFSCluster.Builder(config).numDataNodes(1)
|
||||||
|
.manageNameDfsDirs(false).build();
|
||||||
|
cluster.waitActive();
|
||||||
|
|
||||||
|
secondary = new SecondaryNameNode(config);
|
||||||
|
|
||||||
|
FSImage fsImage = cluster.getNameNode().getFSImage();
|
||||||
|
printStorages(fsImage);
|
||||||
|
|
||||||
|
FileSystem fs = cluster.getFileSystem();
|
||||||
|
Path testPath = new Path("/", "test");
|
||||||
|
writeFile(fs, testPath, 2);
|
||||||
|
|
||||||
|
printStorages(fsImage);
|
||||||
|
|
||||||
|
// Take name1 offline
|
||||||
|
invalidateStorage(fsImage, ImmutableSet.of(path1));
|
||||||
|
|
||||||
|
// Simulate a 2NN beginning a checkpoint, but not finishing. This will
|
||||||
|
// cause name1 to be restored.
|
||||||
|
cluster.getNameNode().rollEditLog();
|
||||||
|
|
||||||
|
printStorages(fsImage);
|
||||||
|
|
||||||
|
// Now another 2NN comes along to do a full checkpoint.
|
||||||
|
secondary.doCheckpoint();
|
||||||
|
|
||||||
|
printStorages(fsImage);
|
||||||
|
|
||||||
|
// The created file should still exist in the in-memory FS state after the
|
||||||
|
// checkpoint.
|
||||||
|
assertTrue("path exists before restart", fs.exists(testPath));
|
||||||
|
|
||||||
|
secondary.shutdown();
|
||||||
|
|
||||||
|
// Restart the NN so it reloads the edits from on-disk.
|
||||||
|
cluster.restartNameNode();
|
||||||
|
|
||||||
|
// The created file should still exist after the restart.
|
||||||
|
assertTrue("path should still exist after restart", fs.exists(testPath));
|
||||||
|
} finally {
|
||||||
|
if (cluster != null) {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
if (secondary != null) {
|
||||||
|
secondary.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user