HDFS-2011. Removal and restoration of storage directories on checkpointing failure doesn't work properly. Contributed by Ravi Prakash.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1141748 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3af51887b4
commit
7bd41f031f
@ -556,6 +556,9 @@ Trunk (unreleased changes)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
HDFS-2011. Removal and restoration of storage directories on checkpointing
|
||||
failure doesn't work properly. (Ravi Prakash via mattf)
|
||||
|
||||
HDFS-1955. FSImage.doUpgrade() was made too fault-tolerant by HDFS-1826.
|
||||
(mattf)
|
||||
|
||||
|
@ -122,19 +122,31 @@ void create() throws IOException {
|
||||
public void close() throws IOException {
|
||||
// close should have been called after all pending transactions
|
||||
// have been flushed & synced.
|
||||
int bufSize = bufCurrent.size();
|
||||
if (bufSize != 0) {
|
||||
throw new IOException("FSEditStream has " + bufSize
|
||||
+ " bytes still to be flushed and cannot " + "be closed.");
|
||||
// if already closed, just skip
|
||||
if(bufCurrent != null)
|
||||
{
|
||||
int bufSize = bufCurrent.size();
|
||||
if (bufSize != 0) {
|
||||
throw new IOException("FSEditStream has " + bufSize
|
||||
+ " bytes still to be flushed and cannot " + "be closed.");
|
||||
}
|
||||
bufCurrent.close();
|
||||
bufCurrent = null;
|
||||
}
|
||||
|
||||
if(bufReady != null) {
|
||||
bufReady.close();
|
||||
bufReady = null;
|
||||
}
|
||||
bufCurrent.close();
|
||||
bufReady.close();
|
||||
|
||||
// remove the last INVALID marker from transaction log.
|
||||
fc.truncate(fc.position());
|
||||
fp.close();
|
||||
|
||||
bufCurrent = bufReady = null;
|
||||
if (fc != null && fc.isOpen()) {
|
||||
fc.truncate(fc.position());
|
||||
fc.close();
|
||||
}
|
||||
if (fp != null) {
|
||||
fp.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -511,6 +511,12 @@ public void setCheckpointTimeInStorage(long newCpT) {
|
||||
// Close any edits stream associated with this dir and remove directory
|
||||
LOG.warn("incrementCheckpointTime failed on "
|
||||
+ sd.getRoot().getPath() + ";type="+sd.getStorageDirType());
|
||||
try {
|
||||
reportErrorsOnDirectory(sd);
|
||||
} catch (IOException ioe) {
|
||||
LOG.error("Failed to report and remove NN storage directory "
|
||||
+ sd.getRoot().getPath(), ioe);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -21,6 +21,7 @@
|
||||
import java.io.*;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.net.URI;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Iterator;
|
||||
@ -137,6 +138,72 @@ private void testNamedirError(Configuration conf, Collection<URI> namedirs)
|
||||
resurrectNameDir(first); // put back namedir
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests EditLogFileOutputStream doesn't throw NullPointerException on being
|
||||
* closed twice.
|
||||
* See https://issues.apache.org/jira/browse/HDFS-2011
|
||||
*/
|
||||
public void testEditLogFileOutputStreamCloses()
|
||||
throws IOException,NullPointerException {
|
||||
System.out.println("Testing EditLogFileOutputStream doesn't throw " +
|
||||
"NullPointerException on being closed twice");
|
||||
File editLogStreamFile = null;
|
||||
try {
|
||||
editLogStreamFile = new File(System.getProperty("test.build.data","/tmp"),
|
||||
"editLogStream.dat");
|
||||
EditLogFileOutputStream editLogStream =
|
||||
new EditLogFileOutputStream(editLogStreamFile, 0);
|
||||
editLogStream.close();
|
||||
//Closing an twice should not throw a NullPointerException
|
||||
editLogStream.close();
|
||||
} finally {
|
||||
if (editLogStreamFile != null)
|
||||
// Cleanup the editLogStream.dat file we created
|
||||
editLogStreamFile.delete();
|
||||
}
|
||||
System.out.println("Successfully tested EditLogFileOutputStream doesn't " +
|
||||
"throw NullPointerException on being closed twice");
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks that an IOException in NNStorage.setCheckpointTimeInStorage is handled
|
||||
* correctly (by removing the storage directory)
|
||||
* See https://issues.apache.org/jira/browse/HDFS-2011
|
||||
*/
|
||||
public void testSetCheckpointTimeInStorageHandlesIOException() throws Exception {
|
||||
System.out.println("Check IOException handled correctly by setCheckpointTimeInStorage");
|
||||
NNStorage nnStorage = new NNStorage(new HdfsConfiguration());
|
||||
ArrayList<URI> fsImageDirs = new ArrayList<URI>();
|
||||
ArrayList<URI> editsDirs = new ArrayList<URI>();
|
||||
File filePath =
|
||||
new File(System.getProperty("test.build.data","/tmp"), "storageDirToCheck");
|
||||
assertTrue("Couldn't create directory storageDirToCheck",
|
||||
filePath.exists() || filePath.mkdirs());
|
||||
try {
|
||||
fsImageDirs.add(filePath.toURI());
|
||||
editsDirs.add(filePath.toURI());
|
||||
// Initialize NNStorage
|
||||
nnStorage.setStorageDirectories(fsImageDirs, editsDirs);
|
||||
assertTrue("List of storage directories didn't have storageDirToCheck.",
|
||||
nnStorage.getEditsDirectories().iterator().next().
|
||||
toString().indexOf("storageDirToCheck") != -1);
|
||||
assertTrue("List of removed storage directories wasn't empty",
|
||||
nnStorage.getRemovedStorageDirs().isEmpty());
|
||||
} finally {
|
||||
// Delete storage directory to cause IOException in setCheckpointTimeInStorage
|
||||
assertTrue("Couldn't remove directory " + filePath.getAbsolutePath(),
|
||||
filePath.delete());
|
||||
}
|
||||
// Just call setCheckpointTimeInStorage using any random number
|
||||
nnStorage.setCheckpointTimeInStorage(1);
|
||||
List<StorageDirectory> listRsd = nnStorage.getRemovedStorageDirs();
|
||||
assertTrue("Removed directory wasn't what was expected",
|
||||
listRsd.size() > 0 && listRsd.get(listRsd.size() - 1).getRoot().
|
||||
toString().indexOf("storageDirToCheck") != -1);
|
||||
System.out.println("Successfully checked IOException is handled correctly "
|
||||
+ "by setCheckpointTimeInStorage");
|
||||
}
|
||||
|
||||
/*
|
||||
* Simulate namenode crashing after rolling edit log.
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user