From 56d2ef6f5ed25055f19eb61e02c52fb9237a78b7 Mon Sep 17 00:00:00 2001 From: Eli Collins Date: Mon, 11 Jun 2012 18:01:38 +0000 Subject: [PATCH] HDFS-2797. Fix misuses of InputStream#skip in the edit log code. Contributed by Colin Patrick McCabe git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1348945 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../namenode/EditLogFileInputStream.java | 3 +- .../hdfs/server/namenode/FSEditLogLoader.java | 5 ++++ .../hdfs/server/namenode/FSEditLogOp.java | 7 +++-- .../hdfs/server/namenode/StreamLimiter.java | 5 ++++ .../ha/TestEditLogsDuringFailover.java | 30 +++++++++++++++++-- 6 files changed, 48 insertions(+), 5 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 0277f829a0..aa62e91d13 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -308,6 +308,9 @@ Branch-2 ( Unreleased changes ) HDFS-3490. DatanodeWebHdfsMethods throws NullPointerException if NamenodeRpcAddressParam is not set. (szetszwo) + HDFS-2797. Fix misuses of InputStream#skip in the edit log code. + (Colin Patrick McCabe via eli) + BREAKDOWN OF HDFS-3042 SUBTASKS HDFS-2185. HDFS portion of ZK-based FailoverController (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java index e6ddf5b920..b224c863c1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java @@ -165,7 +165,8 @@ private FSEditLogOp nextOpImpl(boolean skipBrokenEdits) throws IOException { LOG.warn("skipping " + skipAmt + " bytes at the end " + "of edit log '" + getName() + "': reached txid " + txId + " out of " + lastTxId); - tracker.skip(skipAmt); + tracker.clearLimit(); + IOUtils.skipFully(tracker, skipAmt); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index e1b26bb810..343472dfa1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -751,6 +751,11 @@ public void setLimit(long limit) { limitPos = curPos + limit; } + @Override + public void clearLimit() { + limitPos = Long.MAX_VALUE; + } + @Override public void mark(int limit) { super.mark(limit); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java index 489f030e13..80f637c499 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java @@ -44,6 +44,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.ArrayWritable; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableFactories; @@ -2289,9 +2290,11 @@ public FSEditLogOp readOp(boolean skipBrokenEdits) throws IOException { // 0xff, we want to skip over that region, because there's nothing // interesting there. long numSkip = e.getNumAfterTerminator(); - if (in.skip(numSkip) < numSkip) { + try { + IOUtils.skipFully(in, numSkip); + } catch (Throwable t) { FSImage.LOG.error("Failed to skip " + numSkip + " bytes of " + - "garbage after an OP_INVALID. Unexpected early EOF."); + "garbage after an OP_INVALID.", t); return null; } } catch (IOException e) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StreamLimiter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StreamLimiter.java index 97420828d4..4e533eb0c7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StreamLimiter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StreamLimiter.java @@ -27,4 +27,9 @@ interface StreamLimiter { * Set a limit. Calling this function clears any existing limit. */ public void setLimit(long limit); + + /** + * Disable limit. + */ + public void clearLimit(); } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java index 79dcec4d43..794a3b6bf3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java @@ -20,6 +20,7 @@ import static org.junit.Assert.*; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.net.URI; import java.util.Collections; @@ -35,6 +36,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil; import org.apache.hadoop.hdfs.server.namenode.NNStorage; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.test.GenericTestUtils; import org.junit.Test; @@ -118,8 +120,8 @@ public void testStartup() throws Exception { } } - @Test - public void testFailoverFinalizesAndReadsInProgress() throws Exception { + private void testFailoverFinalizesAndReadsInProgress( + boolean partialTxAtEnd) throws Exception { Configuration conf = new Configuration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) @@ -130,8 +132,21 @@ public void testFailoverFinalizesAndReadsInProgress() throws Exception { URI sharedUri = cluster.getSharedEditsDir(0, 1); File sharedDir = new File(sharedUri.getPath(), "current"); FSImageTestUtil.createAbortedLogWithMkdirs(sharedDir, NUM_DIRS_IN_LOG, 1); + assertEditFiles(Collections.singletonList(sharedUri), NNStorage.getInProgressEditsFileName(1)); + if (partialTxAtEnd) { + FileOutputStream outs = null; + try { + File editLogFile = + new File(sharedDir, NNStorage.getInProgressEditsFileName(1)); + outs = new FileOutputStream(editLogFile, true); + outs.write(new byte[] { 0x18, 0x00, 0x00, 0x00 } ); + LOG.error("editLogFile = " + editLogFile); + } finally { + IOUtils.cleanup(LOG, outs); + } + } // Transition one of the NNs to active cluster.transitionToActive(0); @@ -149,7 +164,18 @@ public void testFailoverFinalizesAndReadsInProgress() throws Exception { } finally { cluster.shutdown(); } + } + + @Test + public void testFailoverFinalizesAndReadsInProgressSimple() + throws Exception { + testFailoverFinalizesAndReadsInProgress(false); + } + @Test + public void testFailoverFinalizesAndReadsInProgressWithPartialTxAtEnd() + throws Exception { + testFailoverFinalizesAndReadsInProgress(true); } /**