HDFS-2909. HA: Inaccessible shared edits dir not getting removed from FSImage storage dirs upon error. Contributed by Bikas Saha.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1244753 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1fb0ab92f8
commit
3c145d3492
@ -202,3 +202,5 @@ HDFS-2947. On startup NN throws an NPE in the metrics system. (atm)
|
||||
HDFS-2942. TestActiveStandbyElectorRealZK fails if build dir does not exist. (atm)
|
||||
|
||||
HDFS-2948. NN throws NPE during shutdown if it fails to startup (todd)
|
||||
|
||||
HDFS-2909. HA: Inaccessible shared edits dir not getting removed from FSImage storage dirs upon error. (Bikas Saha via jitendra)
|
||||
|
@ -221,7 +221,7 @@ private void initJournals(List<URI> dirs) {
|
||||
if (u.getScheme().equals(NNStorage.LOCAL_URI_SCHEME)) {
|
||||
StorageDirectory sd = storage.getStorageDirectory(u);
|
||||
if (sd != null) {
|
||||
journalSet.add(new FileJournalManager(sd), required);
|
||||
journalSet.add(new FileJournalManager(sd, storage), required);
|
||||
}
|
||||
} else {
|
||||
journalSet.add(createJournal(u), required);
|
||||
|
@ -52,6 +52,7 @@ class FileJournalManager implements JournalManager {
|
||||
private static final Log LOG = LogFactory.getLog(FileJournalManager.class);
|
||||
|
||||
private final StorageDirectory sd;
|
||||
private final NNStorage storage;
|
||||
private int outputBufferCapacity = 512*1024;
|
||||
|
||||
private static final Pattern EDITS_REGEX = Pattern.compile(
|
||||
@ -65,8 +66,9 @@ class FileJournalManager implements JournalManager {
|
||||
StoragePurger purger
|
||||
= new NNStorageRetentionManager.DeletionStoragePurger();
|
||||
|
||||
public FileJournalManager(StorageDirectory sd) {
|
||||
public FileJournalManager(StorageDirectory sd, NNStorage storage) {
|
||||
this.sd = sd;
|
||||
this.storage = storage;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -75,11 +77,16 @@ public void close() throws IOException {}
|
||||
@Override
|
||||
synchronized public EditLogOutputStream startLogSegment(long txid)
|
||||
throws IOException {
|
||||
try {
|
||||
currentInProgress = NNStorage.getInProgressEditsFile(sd, txid);
|
||||
EditLogOutputStream stm = new EditLogFileOutputStream(currentInProgress,
|
||||
outputBufferCapacity);
|
||||
stm.create();
|
||||
return stm;
|
||||
} catch (IOException e) {
|
||||
storage.reportErrorsOnDirectory(sd);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -95,6 +102,7 @@ synchronized public void finalizeLogSegment(long firstTxId, long lastTxId)
|
||||
"Can't finalize edits file " + inprogressFile + " since finalized file " +
|
||||
"already exists");
|
||||
if (!inprogressFile.renameTo(dstFile)) {
|
||||
storage.reportErrorsOnDirectory(sd);
|
||||
throw new IllegalStateException("Unable to finalize edits file " + inprogressFile);
|
||||
}
|
||||
if (inprogressFile.equals(currentInProgress)) {
|
||||
|
@ -29,6 +29,7 @@
|
||||
import java.io.FilenameFilter;
|
||||
import java.io.IOException;
|
||||
import org.junit.Test;
|
||||
import org.apache.hadoop.fs.FileUtil;
|
||||
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
||||
import org.apache.hadoop.hdfs.server.namenode.JournalManager.CorruptionException;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
||||
@ -59,7 +60,7 @@ public void testNormalOperation() throws IOException {
|
||||
|
||||
long numJournals = 0;
|
||||
for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.EDITS)) {
|
||||
FileJournalManager jm = new FileJournalManager(sd);
|
||||
FileJournalManager jm = new FileJournalManager(sd, storage);
|
||||
assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1, true));
|
||||
numJournals++;
|
||||
}
|
||||
@ -79,7 +80,7 @@ public void testInprogressRecovery() throws IOException {
|
||||
5, new AbortSpec(5, 0));
|
||||
StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
|
||||
|
||||
FileJournalManager jm = new FileJournalManager(sd);
|
||||
FileJournalManager jm = new FileJournalManager(sd, storage);
|
||||
assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL,
|
||||
jm.getNumberOfTransactions(1, true));
|
||||
}
|
||||
@ -102,16 +103,16 @@ public void testInprogressRecoveryMixed() throws IOException {
|
||||
5, new AbortSpec(5, 1));
|
||||
Iterator<StorageDirectory> dirs = storage.dirIterator(NameNodeDirType.EDITS);
|
||||
StorageDirectory sd = dirs.next();
|
||||
FileJournalManager jm = new FileJournalManager(sd);
|
||||
FileJournalManager jm = new FileJournalManager(sd, storage);
|
||||
assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1, true));
|
||||
|
||||
sd = dirs.next();
|
||||
jm = new FileJournalManager(sd);
|
||||
jm = new FileJournalManager(sd, storage);
|
||||
assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
|
||||
true));
|
||||
|
||||
sd = dirs.next();
|
||||
jm = new FileJournalManager(sd);
|
||||
jm = new FileJournalManager(sd, storage);
|
||||
assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1, true));
|
||||
}
|
||||
|
||||
@ -135,17 +136,17 @@ public void testInprogressRecoveryAll() throws IOException {
|
||||
new AbortSpec(5, 2));
|
||||
Iterator<StorageDirectory> dirs = storage.dirIterator(NameNodeDirType.EDITS);
|
||||
StorageDirectory sd = dirs.next();
|
||||
FileJournalManager jm = new FileJournalManager(sd);
|
||||
FileJournalManager jm = new FileJournalManager(sd, storage);
|
||||
assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
|
||||
true));
|
||||
|
||||
sd = dirs.next();
|
||||
jm = new FileJournalManager(sd);
|
||||
jm = new FileJournalManager(sd, storage);
|
||||
assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
|
||||
true));
|
||||
|
||||
sd = dirs.next();
|
||||
jm = new FileJournalManager(sd);
|
||||
jm = new FileJournalManager(sd, storage);
|
||||
assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
|
||||
true));
|
||||
}
|
||||
@ -162,6 +163,25 @@ private void corruptAfterStartSegment(File f) throws IOException {
|
||||
raf.close();
|
||||
}
|
||||
|
||||
@Test(expected=IllegalStateException.class)
|
||||
public void testFinalizeErrorReportedToNNStorage() throws IOException, InterruptedException {
|
||||
File f = new File(TestEditLog.TEST_DIR + "/filejournaltestError");
|
||||
// abort after 10th roll
|
||||
NNStorage storage = setupEdits(Collections.<URI>singletonList(f.toURI()),
|
||||
10, new AbortSpec(10, 0));
|
||||
StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
|
||||
|
||||
FileJournalManager jm = new FileJournalManager(sd, storage);
|
||||
String sdRootPath = sd.getRoot().getAbsolutePath();
|
||||
FileUtil.chmod(sdRootPath, "-w", true);
|
||||
try {
|
||||
jm.finalizeLogSegment(0, 1);
|
||||
} finally {
|
||||
assertTrue(storage.getRemovedStorageDirs().contains(sd));
|
||||
FileUtil.chmod(sdRootPath, "+w", true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that we can read from a stream created by FileJournalManager.
|
||||
* Create a single edits directory, failing it on the final roll.
|
||||
@ -176,7 +196,7 @@ public void testReadFromStream() throws IOException {
|
||||
10, new AbortSpec(10, 0));
|
||||
StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
|
||||
|
||||
FileJournalManager jm = new FileJournalManager(sd);
|
||||
FileJournalManager jm = new FileJournalManager(sd, storage);
|
||||
long expectedTotalTxnCount = TXNS_PER_ROLL*10 + TXNS_PER_FAIL;
|
||||
assertEquals(expectedTotalTxnCount, jm.getNumberOfTransactions(1, true));
|
||||
|
||||
@ -211,7 +231,7 @@ public void testAskForTransactionsMidfile() throws IOException {
|
||||
10);
|
||||
StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
|
||||
|
||||
FileJournalManager jm = new FileJournalManager(sd);
|
||||
FileJournalManager jm = new FileJournalManager(sd, storage);
|
||||
|
||||
// 10 rolls, so 11 rolled files, 110 txids total.
|
||||
final int TOTAL_TXIDS = 10 * 11;
|
||||
@ -248,7 +268,7 @@ public boolean accept(File dir, String name) {
|
||||
assertEquals(1, files.length);
|
||||
assertTrue(files[0].delete());
|
||||
|
||||
FileJournalManager jm = new FileJournalManager(sd);
|
||||
FileJournalManager jm = new FileJournalManager(sd, storage);
|
||||
assertEquals(startGapTxId-1, jm.getNumberOfTransactions(1, true));
|
||||
|
||||
try {
|
||||
@ -286,7 +306,7 @@ public boolean accept(File dir, String name) {
|
||||
|
||||
corruptAfterStartSegment(files[0]);
|
||||
|
||||
FileJournalManager jm = new FileJournalManager(sd);
|
||||
FileJournalManager jm = new FileJournalManager(sd, storage);
|
||||
assertEquals(10*TXNS_PER_ROLL+1,
|
||||
jm.getNumberOfTransactions(1, true));
|
||||
}
|
||||
@ -300,7 +320,8 @@ public void testGetRemoteEditLog() throws IOException {
|
||||
NNStorage.getInProgressEditsFileName(201),
|
||||
NNStorage.getFinalizedEditsFileName(1001, 1100));
|
||||
|
||||
FileJournalManager fjm = new FileJournalManager(sd);
|
||||
// passing null for NNStorage because this unit test will not use it
|
||||
FileJournalManager fjm = new FileJournalManager(sd, null);
|
||||
assertEquals("[1,100],[101,200],[1001,1100]", getLogsAsString(fjm, 1));
|
||||
assertEquals("[101,200],[1001,1100]", getLogsAsString(fjm, 101));
|
||||
assertEquals("[1001,1100]", getLogsAsString(fjm, 201));
|
||||
@ -336,7 +357,7 @@ public void testReadFromMiddleOfEditLog() throws CorruptionException,
|
||||
10);
|
||||
StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
|
||||
|
||||
FileJournalManager jm = new FileJournalManager(sd);
|
||||
FileJournalManager jm = new FileJournalManager(sd, storage);
|
||||
|
||||
EditLogInputStream elis = jm.getInputStream(5, true);
|
||||
FSEditLogOp op = elis.readOp();
|
||||
@ -357,7 +378,7 @@ public void testExcludeInProgressStreams() throws CorruptionException,
|
||||
10, false);
|
||||
StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
|
||||
|
||||
FileJournalManager jm = new FileJournalManager(sd);
|
||||
FileJournalManager jm = new FileJournalManager(sd, storage);
|
||||
|
||||
// If we exclude the in-progess stream, we should only have 100 tx.
|
||||
assertEquals(100, jm.getNumberOfTransactions(1, false));
|
||||
|
@ -292,8 +292,9 @@ public FSEditLog mockEditLog(StoragePurger purger) {
|
||||
for (FakeRoot root : dirRoots.values()) {
|
||||
if (!root.type.isOfType(NameNodeDirType.EDITS)) continue;
|
||||
|
||||
// passing null NNStorage for unit test because it does not use it
|
||||
FileJournalManager fjm = new FileJournalManager(
|
||||
root.mockStorageDir());
|
||||
root.mockStorageDir(), null);
|
||||
fjm.purger = purger;
|
||||
jms.add(fjm);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user