From 2bb650146ddb36830ea9c0d248fd3df1f6aa7534 Mon Sep 17 00:00:00 2001 From: Vinayakumar B Date: Fri, 25 Jul 2014 07:02:53 +0000 Subject: [PATCH] HDFS-5919. FileJournalManager doesn't purge empty and corrupt inprogress edits files (vinayakumarb) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1613355 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../server/namenode/FileJournalManager.java | 31 +++++++++++++++++-- .../TestNNStorageRetentionManager.java | 11 +++++-- 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 498454916d..9fb5ba132e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -370,6 +370,9 @@ Release 2.6.0 - UNRELEASED HDFS-6715. Webhdfs wont fail over when it gets java.io.IOException: Namenode is in startup mode. (jing9) + HDFS-5919. FileJournalManager doesn't purge empty and corrupt inprogress edits + files (vinayakumarb) + Release 2.5.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java index a41ff1390c..362c316cc2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java @@ -71,6 +71,8 @@ public class FileJournalManager implements JournalManager { NameNodeFile.EDITS.getName() + "_(\\d+)-(\\d+)"); private static final Pattern EDITS_INPROGRESS_REGEX = Pattern.compile( NameNodeFile.EDITS_INPROGRESS.getName() + "_(\\d+)"); + private static final Pattern EDITS_INPROGRESS_STALE_REGEX = Pattern.compile( + NameNodeFile.EDITS_INPROGRESS.getName() + "_(\\d+).*(\\S+)"); private File currentInProgress = null; @@ -162,8 +164,7 @@ public class FileJournalManager implements JournalManager { throws IOException { LOG.info("Purging logs older than " + minTxIdToKeep); File[] files = FileUtil.listFiles(sd.getCurrentDir()); - List editLogs = - FileJournalManager.matchEditLogs(files); + List editLogs = matchEditLogs(files, true); for (EditLogFile log : editLogs) { if (log.getFirstTxId() < minTxIdToKeep && log.getLastTxId() < minTxIdToKeep) { @@ -244,8 +245,13 @@ public class FileJournalManager implements JournalManager { public static List matchEditLogs(File logDir) throws IOException { return matchEditLogs(FileUtil.listFiles(logDir)); } - + static List matchEditLogs(File[] filesInStorage) { + return matchEditLogs(filesInStorage, false); + } + + private static List matchEditLogs(File[] filesInStorage, + boolean forPurging) { List ret = Lists.newArrayList(); for (File f : filesInStorage) { String name = f.getName(); @@ -256,6 +262,7 @@ public class FileJournalManager implements JournalManager { long startTxId = Long.parseLong(editsMatch.group(1)); long endTxId = Long.parseLong(editsMatch.group(2)); ret.add(new EditLogFile(f, startTxId, endTxId)); + continue; } catch (NumberFormatException nfe) { LOG.error("Edits file " + f + " has improperly formatted " + "transaction ID"); @@ -270,12 +277,30 @@ public class FileJournalManager implements JournalManager { long startTxId = Long.parseLong(inProgressEditsMatch.group(1)); ret.add( new EditLogFile(f, startTxId, HdfsConstants.INVALID_TXID, true)); + continue; } catch (NumberFormatException nfe) { LOG.error("In-progress edits file " + f + " has improperly " + "formatted transaction ID"); // skip } } + if (forPurging) { + // Check for in-progress stale edits + Matcher staleInprogressEditsMatch = EDITS_INPROGRESS_STALE_REGEX + .matcher(name); + if (staleInprogressEditsMatch.matches()) { + try { + long startTxId = Long.valueOf(staleInprogressEditsMatch.group(1)); + ret.add(new EditLogFile(f, startTxId, HdfsConstants.INVALID_TXID, + true)); + continue; + } catch (NumberFormatException nfe) { + LOG.error("In-progress stale edits file " + f + " has improperly " + + "formatted transaction ID"); + // skip + } + } + } } return ret; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java index 8f08ef39f8..346d94962b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java @@ -212,18 +212,25 @@ public class TestNNStorageRetentionManager { tc.addImage("/foo1/current/" + getImageFileName(300), false); tc.addImage("/foo1/current/" + getImageFileName(400), false); + // Segments containing txns upto txId 250 are extra and should be purged. tc.addLog("/foo2/current/" + getFinalizedEditsFileName(1, 100), true); - // Without lowering the max segments to retain, we'd retain all segments - // going back to txid 150 (300 - 150). tc.addLog("/foo2/current/" + getFinalizedEditsFileName(101, 175), true); + tc.addLog("/foo2/current/" + getInProgressEditsFileName(176) + ".empty", + true); tc.addLog("/foo2/current/" + getFinalizedEditsFileName(176, 200), true); tc.addLog("/foo2/current/" + getFinalizedEditsFileName(201, 225), true); + tc.addLog("/foo2/current/" + getInProgressEditsFileName(226) + ".corrupt", + true); tc.addLog("/foo2/current/" + getFinalizedEditsFileName(226, 240), true); // Only retain 2 extra segments. The 301-350 and 351-400 segments are // considered required, not extra. tc.addLog("/foo2/current/" + getFinalizedEditsFileName(241, 275), false); tc.addLog("/foo2/current/" + getFinalizedEditsFileName(276, 300), false); + tc.addLog("/foo2/current/" + getInProgressEditsFileName(301) + ".empty", + false); tc.addLog("/foo2/current/" + getFinalizedEditsFileName(301, 350), false); + tc.addLog("/foo2/current/" + getInProgressEditsFileName(351) + ".corrupt", + false); tc.addLog("/foo2/current/" + getFinalizedEditsFileName(351, 400), false); tc.addLog("/foo2/current/" + getInProgressEditsFileName(401), false); runTest(tc);