HDFS-2011. Removal and restoration of storage directories on checkpointing failure doesn't work properly. Contributed by Ravi Prakash.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1141748 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3af51887b4
commit
7bd41f031f
@ -556,6 +556,9 @@ Trunk (unreleased changes)
|
|||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
||||||
|
HDFS-2011. Removal and restoration of storage directories on checkpointing
|
||||||
|
failure doesn't work properly. (Ravi Prakash via mattf)
|
||||||
|
|
||||||
HDFS-1955. FSImage.doUpgrade() was made too fault-tolerant by HDFS-1826.
|
HDFS-1955. FSImage.doUpgrade() was made too fault-tolerant by HDFS-1826.
|
||||||
(mattf)
|
(mattf)
|
||||||
|
|
||||||
|
@ -122,19 +122,31 @@ void create() throws IOException {
|
|||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
// close should have been called after all pending transactions
|
// close should have been called after all pending transactions
|
||||||
// have been flushed & synced.
|
// have been flushed & synced.
|
||||||
|
// if already closed, just skip
|
||||||
|
if(bufCurrent != null)
|
||||||
|
{
|
||||||
int bufSize = bufCurrent.size();
|
int bufSize = bufCurrent.size();
|
||||||
if (bufSize != 0) {
|
if (bufSize != 0) {
|
||||||
throw new IOException("FSEditStream has " + bufSize
|
throw new IOException("FSEditStream has " + bufSize
|
||||||
+ " bytes still to be flushed and cannot " + "be closed.");
|
+ " bytes still to be flushed and cannot " + "be closed.");
|
||||||
}
|
}
|
||||||
bufCurrent.close();
|
bufCurrent.close();
|
||||||
|
bufCurrent = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(bufReady != null) {
|
||||||
bufReady.close();
|
bufReady.close();
|
||||||
|
bufReady = null;
|
||||||
|
}
|
||||||
|
|
||||||
// remove the last INVALID marker from transaction log.
|
// remove the last INVALID marker from transaction log.
|
||||||
|
if (fc != null && fc.isOpen()) {
|
||||||
fc.truncate(fc.position());
|
fc.truncate(fc.position());
|
||||||
|
fc.close();
|
||||||
|
}
|
||||||
|
if (fp != null) {
|
||||||
fp.close();
|
fp.close();
|
||||||
|
}
|
||||||
bufCurrent = bufReady = null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -511,6 +511,12 @@ public void setCheckpointTimeInStorage(long newCpT) {
|
|||||||
// Close any edits stream associated with this dir and remove directory
|
// Close any edits stream associated with this dir and remove directory
|
||||||
LOG.warn("incrementCheckpointTime failed on "
|
LOG.warn("incrementCheckpointTime failed on "
|
||||||
+ sd.getRoot().getPath() + ";type="+sd.getStorageDirType());
|
+ sd.getRoot().getPath() + ";type="+sd.getStorageDirType());
|
||||||
|
try {
|
||||||
|
reportErrorsOnDirectory(sd);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
LOG.error("Failed to report and remove NN storage directory "
|
||||||
|
+ sd.getRoot().getPath(), ioe);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
@ -137,6 +138,72 @@ private void testNamedirError(Configuration conf, Collection<URI> namedirs)
|
|||||||
resurrectNameDir(first); // put back namedir
|
resurrectNameDir(first); // put back namedir
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests EditLogFileOutputStream doesn't throw NullPointerException on being
|
||||||
|
* closed twice.
|
||||||
|
* See https://issues.apache.org/jira/browse/HDFS-2011
|
||||||
|
*/
|
||||||
|
public void testEditLogFileOutputStreamCloses()
|
||||||
|
throws IOException,NullPointerException {
|
||||||
|
System.out.println("Testing EditLogFileOutputStream doesn't throw " +
|
||||||
|
"NullPointerException on being closed twice");
|
||||||
|
File editLogStreamFile = null;
|
||||||
|
try {
|
||||||
|
editLogStreamFile = new File(System.getProperty("test.build.data","/tmp"),
|
||||||
|
"editLogStream.dat");
|
||||||
|
EditLogFileOutputStream editLogStream =
|
||||||
|
new EditLogFileOutputStream(editLogStreamFile, 0);
|
||||||
|
editLogStream.close();
|
||||||
|
//Closing an twice should not throw a NullPointerException
|
||||||
|
editLogStream.close();
|
||||||
|
} finally {
|
||||||
|
if (editLogStreamFile != null)
|
||||||
|
// Cleanup the editLogStream.dat file we created
|
||||||
|
editLogStreamFile.delete();
|
||||||
|
}
|
||||||
|
System.out.println("Successfully tested EditLogFileOutputStream doesn't " +
|
||||||
|
"throw NullPointerException on being closed twice");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks that an IOException in NNStorage.setCheckpointTimeInStorage is handled
|
||||||
|
* correctly (by removing the storage directory)
|
||||||
|
* See https://issues.apache.org/jira/browse/HDFS-2011
|
||||||
|
*/
|
||||||
|
public void testSetCheckpointTimeInStorageHandlesIOException() throws Exception {
|
||||||
|
System.out.println("Check IOException handled correctly by setCheckpointTimeInStorage");
|
||||||
|
NNStorage nnStorage = new NNStorage(new HdfsConfiguration());
|
||||||
|
ArrayList<URI> fsImageDirs = new ArrayList<URI>();
|
||||||
|
ArrayList<URI> editsDirs = new ArrayList<URI>();
|
||||||
|
File filePath =
|
||||||
|
new File(System.getProperty("test.build.data","/tmp"), "storageDirToCheck");
|
||||||
|
assertTrue("Couldn't create directory storageDirToCheck",
|
||||||
|
filePath.exists() || filePath.mkdirs());
|
||||||
|
try {
|
||||||
|
fsImageDirs.add(filePath.toURI());
|
||||||
|
editsDirs.add(filePath.toURI());
|
||||||
|
// Initialize NNStorage
|
||||||
|
nnStorage.setStorageDirectories(fsImageDirs, editsDirs);
|
||||||
|
assertTrue("List of storage directories didn't have storageDirToCheck.",
|
||||||
|
nnStorage.getEditsDirectories().iterator().next().
|
||||||
|
toString().indexOf("storageDirToCheck") != -1);
|
||||||
|
assertTrue("List of removed storage directories wasn't empty",
|
||||||
|
nnStorage.getRemovedStorageDirs().isEmpty());
|
||||||
|
} finally {
|
||||||
|
// Delete storage directory to cause IOException in setCheckpointTimeInStorage
|
||||||
|
assertTrue("Couldn't remove directory " + filePath.getAbsolutePath(),
|
||||||
|
filePath.delete());
|
||||||
|
}
|
||||||
|
// Just call setCheckpointTimeInStorage using any random number
|
||||||
|
nnStorage.setCheckpointTimeInStorage(1);
|
||||||
|
List<StorageDirectory> listRsd = nnStorage.getRemovedStorageDirs();
|
||||||
|
assertTrue("Removed directory wasn't what was expected",
|
||||||
|
listRsd.size() > 0 && listRsd.get(listRsd.size() - 1).getRoot().
|
||||||
|
toString().indexOf("storageDirToCheck") != -1);
|
||||||
|
System.out.println("Successfully checked IOException is handled correctly "
|
||||||
|
+ "by setCheckpointTimeInStorage");
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Simulate namenode crashing after rolling edit log.
|
* Simulate namenode crashing after rolling edit log.
|
||||||
*/
|
*/
|
||||||
|
Loading…
Reference in New Issue
Block a user