HDFS-2100. Improve TestStorageRestore. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1138262 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Aaron Myers 2011-06-22 00:14:49 +00:00
parent 3f190b3e1a
commit a851ce5dcb
2 changed files with 92 additions and 16 deletions

View File

@ -519,6 +519,8 @@ Trunk (unreleased changes)
HDFS-1568. Improve the log messages in DataXceiver. (Joey Echeverria via HDFS-1568. Improve the log messages in DataXceiver. (Joey Echeverria via
szetszwo) szetszwo)
HDFS-2100. Improve TestStorageRestore. (atm)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-1458. Improve checkpoint performance by avoiding unnecessary image HDFS-1458. Improve checkpoint performance by avoiding unnecessary image

View File

@ -18,6 +18,10 @@
package org.apache.hadoop.hdfs.server.namenode; package org.apache.hadoop.hdfs.server.namenode;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.DataInputStream; import java.io.DataInputStream;
import java.io.EOFException; import java.io.EOFException;
import java.io.File; import java.io.File;
@ -28,32 +32,35 @@
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.Random; import java.util.Random;
import java.util.Set;
import junit.framework.TestCase;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.cli.CLITestCmdDFS; import org.apache.hadoop.cli.CLITestCmdDFS;
import org.apache.hadoop.cli.util.*; import org.apache.hadoop.cli.util.CLICommandDFSAdmin;
import org.apache.hadoop.cli.util.CommandExecutor;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
import org.apache.hadoop.hdfs.DFSConfigKeys; import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import com.google.common.collect.ImmutableSet;
/** /**
* Startup and checkpoint tests * Startup and checkpoint tests
* *
*/ */
public class TestStorageRestore extends TestCase { public class TestStorageRestore {
public static final String NAME_NODE_HOST = "localhost:"; public static final String NAME_NODE_HOST = "localhost:";
public static final String NAME_NODE_HTTP_HOST = "0.0.0.0:"; public static final String NAME_NODE_HTTP_HOST = "0.0.0.0:";
private static final Log LOG = private static final Log LOG =
@ -78,8 +85,8 @@ private void writeFile(FileSystem fileSys, Path name, int repl)
stm.close(); stm.close();
} }
@Before
protected void setUp() throws Exception { public void setUpNameDirs() throws Exception {
config = new HdfsConfiguration(); config = new HdfsConfiguration();
hdfsDir = new File(MiniDFSCluster.getBaseDirectory()).getCanonicalFile(); hdfsDir = new File(MiniDFSCluster.getBaseDirectory()).getCanonicalFile();
if ( hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) { if ( hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) {
@ -116,7 +123,8 @@ protected void setUp() throws Exception {
/** /**
* clean up * clean up
*/ */
public void tearDown() throws Exception { @After
public void cleanUpNameDirs() throws Exception {
if (hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) { if (hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) {
throw new IOException("Could not delete hdfs directory in tearDown '" + hdfsDir + "'"); throw new IOException("Could not delete hdfs directory in tearDown '" + hdfsDir + "'");
} }
@ -125,12 +133,13 @@ public void tearDown() throws Exception {
/** /**
* invalidate storage by removing storage directories * invalidate storage by removing storage directories
*/ */
public void invalidateStorage(FSImage fi) throws IOException { public void invalidateStorage(FSImage fi, Set<File> filesToInvalidate) throws IOException {
ArrayList<StorageDirectory> al = new ArrayList<StorageDirectory>(2); ArrayList<StorageDirectory> al = new ArrayList<StorageDirectory>(2);
Iterator<StorageDirectory> it = fi.getStorage().dirIterator(); Iterator<StorageDirectory> it = fi.getStorage().dirIterator();
while(it.hasNext()) { while(it.hasNext()) {
StorageDirectory sd = it.next(); StorageDirectory sd = it.next();
if(sd.getRoot().equals(path2) || sd.getRoot().equals(path3)) { if(filesToInvalidate.contains(sd.getRoot())) {
LOG.info("causing IO error on " + sd.getRoot());
al.add(sd); al.add(sd);
} }
} }
@ -142,16 +151,16 @@ public void invalidateStorage(FSImage fi) throws IOException {
* test * test
*/ */
public void printStorages(FSImage fs) { public void printStorages(FSImage fs) {
LOG.info("current storages and corresoponding sizes:"); LOG.info("current storages and corresponding sizes:");
for(Iterator<StorageDirectory> it = fs.getStorage().dirIterator(); it.hasNext(); ) { for(Iterator<StorageDirectory> it = fs.getStorage().dirIterator(); it.hasNext(); ) {
StorageDirectory sd = it.next(); StorageDirectory sd = it.next();
if(sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) { if(sd.getStorageDirType().isOfType(NameNodeDirType.IMAGE)) {
File imf = fs.getStorage().getStorageFile(sd, NameNodeFile.IMAGE); File imf = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE);
LOG.info(" image file " + imf.getAbsolutePath() + "; len = " + imf.length()); LOG.info(" image file " + imf.getAbsolutePath() + "; len = " + imf.length());
} }
if(sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) { if(sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) {
File edf = fs.getStorage().getStorageFile(sd, NameNodeFile.EDITS); File edf = NNStorage.getStorageFile(sd, NameNodeFile.EDITS);
LOG.info(" edits file " + edf.getAbsolutePath() + "; len = " + edf.length()); LOG.info(" edits file " + edf.getAbsolutePath() + "; len = " + edf.length());
} }
} }
@ -282,6 +291,7 @@ public void checkFiles(boolean valid) throws IOException {
* 8. verify that all the image and edits files are the same. * 8. verify that all the image and edits files are the same.
*/ */
@SuppressWarnings("deprecation") @SuppressWarnings("deprecation")
@Test
public void testStorageRestore() throws Exception { public void testStorageRestore() throws Exception {
int numDatanodes = 2; int numDatanodes = 2;
cluster = new MiniDFSCluster.Builder(config).numDataNodes(numDatanodes) cluster = new MiniDFSCluster.Builder(config).numDataNodes(numDatanodes)
@ -299,7 +309,7 @@ public void testStorageRestore() throws Exception {
System.out.println("****testStorageRestore: file test written, invalidating storage..."); System.out.println("****testStorageRestore: file test written, invalidating storage...");
invalidateStorage(cluster.getNameNode().getFSImage()); invalidateStorage(cluster.getNameNode().getFSImage(), ImmutableSet.of(path2, path3));
//secondary.doCheckpoint(); // this will cause storages to be removed. //secondary.doCheckpoint(); // this will cause storages to be removed.
printStorages(cluster.getNameNode().getFSImage()); printStorages(cluster.getNameNode().getFSImage());
System.out.println("****testStorageRestore: storage invalidated + doCheckpoint"); System.out.println("****testStorageRestore: storage invalidated + doCheckpoint");
@ -331,6 +341,7 @@ public void testStorageRestore() throws Exception {
* Test dfsadmin -restoreFailedStorage command * Test dfsadmin -restoreFailedStorage command
* @throws Exception * @throws Exception
*/ */
@Test
public void testDfsAdminCmd() throws Exception { public void testDfsAdminCmd() throws Exception {
cluster = new MiniDFSCluster.Builder(config). cluster = new MiniDFSCluster.Builder(config).
numDataNodes(2). numDataNodes(2).
@ -362,7 +373,7 @@ public void testDfsAdminCmd() throws Exception {
restore = fsi.getStorage().getRestoreFailedStorage(); restore = fsi.getStorage().getRestoreFailedStorage();
assertTrue("After set false call restore is " + restore, restore); assertTrue("After set false call restore is " + restore, restore);
// run one more time - no change in value // run one more time - no change in value
cmd = "-fs NAMENODE -restoreFailedStorage check"; cmd = "-fs NAMENODE -restoreFailedStorage check";
CommandExecutor.Result cmdResult = executor.executeCommand(cmd); CommandExecutor.Result cmdResult = executor.executeCommand(cmd);
restore = fsi.getStorage().getRestoreFailedStorage(); restore = fsi.getStorage().getRestoreFailedStorage();
@ -376,4 +387,67 @@ public void testDfsAdminCmd() throws Exception {
cluster.shutdown(); cluster.shutdown();
} }
} }
/**
* Test to simulate interleaved checkpointing by 2 2NNs after a storage
* directory has been taken offline. The first will cause the directory to
* come back online, but it won't have any valid contents. The second 2NN will
* then try to perform a checkpoint. The NN should not serve up the image or
* edits from the restored (empty) dir.
*/
@SuppressWarnings("deprecation")
@Test
public void testMultipleSecondaryCheckpoint() throws IOException {
SecondaryNameNode secondary = null;
try {
cluster = new MiniDFSCluster.Builder(config).numDataNodes(1)
.manageNameDfsDirs(false).build();
cluster.waitActive();
secondary = new SecondaryNameNode(config);
FSImage fsImage = cluster.getNameNode().getFSImage();
printStorages(fsImage);
FileSystem fs = cluster.getFileSystem();
Path testPath = new Path("/", "test");
writeFile(fs, testPath, 2);
printStorages(fsImage);
// Take name1 offline
invalidateStorage(fsImage, ImmutableSet.of(path1));
// Simulate a 2NN beginning a checkpoint, but not finishing. This will
// cause name1 to be restored.
cluster.getNameNode().rollEditLog();
printStorages(fsImage);
// Now another 2NN comes along to do a full checkpoint.
secondary.doCheckpoint();
printStorages(fsImage);
// The created file should still exist in the in-memory FS state after the
// checkpoint.
assertTrue("path exists before restart", fs.exists(testPath));
secondary.shutdown();
// Restart the NN so it reloads the edits from on-disk.
cluster.restartNameNode();
// The created file should still exist after the restart.
assertTrue("path should still exist after restart", fs.exists(testPath));
} finally {
if (cluster != null) {
cluster.shutdown();
}
if (secondary != null) {
secondary.shutdown();
}
}
}
} }