From 298e867673b1bbf33dbde828896ea3a332f4ec7c Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Wed, 11 Jan 2012 06:08:13 +0000 Subject: [PATCH] HDFS-2773. Reading edit logs from an earlier version should not leave blocks in under-construction state. Contributed by Todd Lipcon. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1229900 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-hdfs/CHANGES.HDFS-1623.txt | 2 + .../hdfs/server/namenode/FSEditLogLoader.java | 14 +++- .../namenode/INodeFileUnderConstruction.java | 21 +++++ .../apache/hadoop/hdfs/TestPersistBlocks.java | 73 ++++++++++++++++++ .../resources/hadoop-1.0-multiblock-file.tgz | Bin 0 -> 2811 bytes 5 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1.0-multiblock-file.tgz diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt index fae2f31304..0b01c22d54 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt @@ -95,3 +95,5 @@ HDFS-2762. Fix TestCheckpoint timing out on HA branch. (Uma Maheswara Rao G via HDFS-2724. NN web UI can throw NPE after startup, before standby state is entered. (todd) HDFS-2753. Fix standby getting stuck in safemode when blocks are written while SBN is down. (Hari Mankude and todd via todd) + +HDFS-2773. Reading edit logs from an earlier version should not leave blocks in under-construction state. (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index e1394e630b..56d2fcb588 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -467,7 +467,7 @@ private void updateBlocks(FSDirectory fsDir, AddCloseOp addCloseOp, BlockInfo oldBlock = oldBlocks[i]; Block newBlock = addCloseOp.blocks[i]; - boolean isLastBlock = i == oldBlocks.length - 1; + boolean isLastBlock = i == addCloseOp.blocks.length - 1; if (oldBlock.getBlockId() != newBlock.getBlockId() || (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && !(isGenStampUpdate && isLastBlock))) { @@ -504,7 +504,17 @@ private void updateBlocks(FSDirectory fsDir, AddCloseOp addCloseOp, // We're adding blocks for (int i = oldBlocks.length; i < addCloseOp.blocks.length; i++) { Block newBlock = addCloseOp.blocks[i]; - BlockInfo newBI = new BlockInfoUnderConstruction(newBlock, file.getReplication()); + BlockInfo newBI; + if (addCloseOp.opCode == FSEditLogOpCodes.OP_ADD){ + newBI = new BlockInfoUnderConstruction( + newBlock, file.getReplication()); + } else { + // OP_CLOSE should add finalized blocks. This code path + // is only executed when loading edits written by prior + // versions of Hadoop. Current versions always log + // OP_ADD operations as each block is allocated. + newBI = new BlockInfo(newBlock, file.getReplication()); + } fsNamesys.getBlockManager().addINode(newBI, file); file.addBlock(newBI); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java index 2440c4dd12..d3f918bf3d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java @@ -26,6 +26,8 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; +import com.google.common.base.Joiner; + /** * I-node for file being written. */ @@ -94,6 +96,9 @@ public boolean isUnderConstruction() { // use the modification time as the access time // INodeFile convertToInodeFile() { + assert allBlocksComplete() : + "Can't finalize inode " + this + " since it contains " + + "non-complete blocks! Blocks are: " + blocksAsString(); INodeFile obj = new INodeFile(getPermissionStatus(), getBlocks(), getReplication(), @@ -103,6 +108,18 @@ INodeFile convertToInodeFile() { return obj; } + + /** + * @return true if all of the blocks in this file are marked as completed. + */ + private boolean allBlocksComplete() { + for (BlockInfo b : blocks) { + if (!b.isComplete()) { + return false; + } + } + return true; + } /** * Remove a block from the block list. This block should be @@ -141,4 +158,8 @@ public BlockInfoUnderConstruction setLastBlock(BlockInfo lastBlock, setBlock(numBlocks()-1, ucBlock); return ucBlock; } + + private String blocksAsString() { + return Joiner.on(",").join(this.blocks); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java index dd1ff016a8..cb989298fa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java @@ -23,22 +23,34 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; +import org.apache.hadoop.hdfs.server.namenode.FSEditLog; import org.apache.hadoop.hdfs.server.namenode.FSImage; +import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; +import org.apache.hadoop.hdfs.server.namenode.INodeFileUnderConstruction; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; +import java.io.File; import java.io.IOException; +import java.net.URI; +import java.util.Collection; +import java.util.List; import java.util.Random; import static org.junit.Assert.*; import org.junit.Test; +import com.google.common.collect.Lists; + /** * A JUnit test for checking if restarting DFS preserves the * blocks that are part of an unclosed file. @@ -57,6 +69,9 @@ public class TestPersistBlocks { static final byte[] DATA_BEFORE_RESTART = new byte[BLOCK_SIZE * NUM_BLOCKS]; static final byte[] DATA_AFTER_RESTART = new byte[BLOCK_SIZE * NUM_BLOCKS]; + + private static final String HADOOP_1_0_MULTIBLOCK_TGZ = + "hadoop-1.0-multiblock-file.tgz"; static { Random rand = new Random(); rand.nextBytes(DATA_BEFORE_RESTART); @@ -277,4 +292,62 @@ public void testRestartWithAppend() throws IOException { if (cluster != null) { cluster.shutdown(); } } } + + /** + * Earlier versions of HDFS didn't persist block allocation to the edit log. + * This makes sure that we can still load an edit log when the OP_CLOSE + * is the opcode which adds all of the blocks. This is a regression + * test for HDFS-2773. + * This test uses a tarred pseudo-distributed cluster from Hadoop 1.0 + * which has a multi-block file. This is similar to the tests in + * {@link TestDFSUpgradeFromImage} but none of those images include + * a multi-block file. + */ + @Test + public void testEarlierVersionEditLog() throws Exception { + final Configuration conf = new HdfsConfiguration(); + + String tarFile = System.getProperty("test.cache.data", "build/test/cache") + + "/" + HADOOP_1_0_MULTIBLOCK_TGZ; + String testDir = System.getProperty("test.build.data", "build/test/data"); + File dfsDir = new File(testDir, "image-1.0"); + if (dfsDir.exists() && !FileUtil.fullyDelete(dfsDir)) { + throw new IOException("Could not delete dfs directory '" + dfsDir + "'"); + } + FileUtil.unTar(new File(tarFile), new File(testDir)); + + File nameDir = new File(dfsDir, "name"); + GenericTestUtils.assertExists(nameDir); + File dataDir = new File(dfsDir, "data"); + GenericTestUtils.assertExists(dataDir); + + conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDir.getAbsolutePath()); + conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dataDir.getAbsolutePath()); + + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0) + .format(false) + .manageDataDfsDirs(false) + .manageNameDfsDirs(false) + .numDataNodes(1) + .startupOption(StartupOption.UPGRADE) + .build(); + try { + FileSystem fs = cluster.getFileSystem(); + Path testPath = new Path("/user/todd/4blocks"); + // Read it without caring about the actual data within - we just need + // to make sure that the block states and locations are OK. + DFSTestUtil.readFile(fs, testPath); + + // Ensure that we can append to it - if the blocks were in some funny + // state we'd get some kind of issue here. + FSDataOutputStream stm = fs.append(testPath); + try { + stm.write(1); + } finally { + IOUtils.closeStream(stm); + } + } finally { + cluster.shutdown(); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1.0-multiblock-file.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1.0-multiblock-file.tgz new file mode 100644 index 0000000000000000000000000000000000000000..8e327c2f3c82dca39dadf5d2d70f982e7ac9c1e0 GIT binary patch literal 2811 zcmeH}`#;qA9>=xYp_V9WUE66-9YhSWZZUIeV^)P!JIt7J+iA#cT{0Ma+sGWZ)FQdg zF3VJQlFPUhO_^N6m_>F9!(mLWLo;JAnfZRs+JEEp{_*|(<@I>}@P0iJ*}GIciqySS zJ{R%5!VPf;RRXB-T46t>-%vOII`^sWo$qWQmw4CTEJ~lmF+F@DOlMBbI*q)#7Ub95 zc(yB$JK+%Rc2ftVNpSeOp5X9ek0_!m)N$>_PXe=?{DP)P9s5i{$ihbTLa(4W@i^Qu zRa$H>aHr7Qa03|i=^$ZedbM-E8{3rINvC>Usx;LSIG-7|eVFdO|5oU~dMzUhZ!c?R za8mbP%HZeb=GT<2!Mc~WAn~d#IV=0&PI@L(c27{%TJ~=RDexG~;HR?YHRyg?o;u)& zfe`xnR{bWo(^?E!y@i6Eh&i7T&W;2N5JaYlI4=xG=tirL*Z}szeyG968KzJuH0)O_ z>FeqKwzQXpT;qX8dxO*_^6cWg^r2=v#T9Ct;)^&g&;J;`dgk_PKrtt;OtyD8mD?D< zhY|-i!eKhD?b#i0{KI=jq)9@{yWO2`dxgCGCWUT0M7A*1dO5TBz*8fMgN=?&JZcuK zT@#IsU@*y9)lZ!C;yqX}25D=kf+Y z)sGOM^@3u(gTVhN)yJ&PVns76RcKDsxaR%mchVz|dFHXxQp~#Ah|dPye*zG$1l6$F zH_U;765xelm{?@ZM$0n>HLZ^j)AWm*#h`%zSyvNSWmf{)x(r%6{l02Lmybamfng^LKFU z7KaOLP_Pz&M>i%n8Kfx+gg0LG?T& zDuhf{Q)e(LWlwk!-p;cma(K)CJaQjUOH*bJUgMgE1dCK z5;8alroT>rsfm_=G|lnjdwz&))Je}h)K_P+AZy*X-2?w&n?An1`05v4_yPN(qRM0a zowkFI7|eCg`u$inb|mX}=diq_uHnU8Kl7#9mLi`=;~Wg1KiyG@4k4d7D|IJyEPKrE zL^1*5-VQWIKVO2*>COWZhfE;||GNkJ|J%U3*)P_X`-eAC)1(E~)_}d%ri2pD90H^O zuu1dz(-&T4IYW8eM$ajbfBG&=D^3VhaQOo1@9rX~><5gPJ7WWf;<2BLfQC^M7-cfn z4<-CDX?ZgyA@+I%@jAm<$P!Od(7Pv}r_2rJP5S%cG*$Tlt)oV&2&1xzJ&*WmFSSYy z`H{$Z**HJdYFTyqRKdmJYG)2^cwR$jFY)mmnNmw`i|I9Z^{rdiog9*UJWO z*5S>~HH1B$Ou)s6k+mm1{`LWO`Q+O8jgcPoW7iq9POzB8;zzg`C9^`vcF;tW7S)yX z#pv`U5oDVMVbw2+X@BWLOZ=8H$bhGy#edv13ANA#I0s|cupAqe!m{>7=B~}F?)oYlIlK*uuUd zGZx^la6LbEEHh_smfZo~Kld<`@KL{o`r3UK?XsIx1RicdSHFc>!=AlBo`mg}S`Ew` ziJJ9xQC|`l8{9ZP3sv-jcXvQt)5t+UW8~yZ(MczP7e@(g|DcO8eel%r!srZ`b{Zi6 zTD*ue8Ue8@B-kZlSP4aIyU3K>kTnH$K#I63kk5hY{0czcoM6Sgk?`dY?q*a`n@=<4 zrh~ej5|a|HTSt312CcDC*H91(6L07%h3f~Ue;!A4a#}{y?`Nbm^!4f6w-R2Z<|8@fmrn7+R U_E|**pka0Ryx#?t*1anK11!+myZ`_I literal 0 HcmV?d00001