From d920cdab91781e547932234d52807ba5a59eb120 Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Tue, 23 Jun 2015 22:29:28 -0700 Subject: [PATCH] HDFS-8567. Erasure Coding: SafeMode handles file smaller than a full stripe. Contributed by Walter Su. --- .../hadoop-hdfs/CHANGES-HDFS-EC-7285.txt | 3 + .../blockmanagement/BlockInfoStriped.java | 18 +++ .../server/blockmanagement/BlockManager.java | 30 ++-- .../hdfs/server/namenode/FSNamesystem.java | 2 +- .../org/apache/hadoop/hdfs/TestSafeMode.java | 11 -- .../hdfs/TestSafeModeWithStripedFile.java | 150 ++++++++++++++++++ 6 files changed, 181 insertions(+), 33 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeModeWithStripedFile.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt index 2c91dad62d..7f624252ec 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt @@ -317,3 +317,6 @@ HDFS-8602. Erasure Coding: Client can't read(decode) the EC files which have corrupt blocks. (jing9 and Kai Sasaki) + + HDFS-8567. Erasure Coding: SafeMode handles file smaller than a full stripe. + (Walter Su via jing9) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoStriped.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoStriped.java index 4cede91281..07e29f8a71 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoStriped.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoStriped.java @@ -73,6 +73,24 @@ public short getParityBlockNum() { return (short) this.schema.getNumParityUnits(); } + /** + * If the block is committed/completed and its length is less than a full + * stripe, it returns the the number of actual data blocks. + * Otherwise it returns the number of data units specified by schema. + */ + public short getRealDataBlockNum() { + if (isComplete() || getBlockUCState() == BlockUCState.COMMITTED) { + return (short) Math.min(getDataBlockNum(), + (getNumBytes() - 1) / BLOCK_STRIPED_CELL_SIZE + 1); + } else { + return getDataBlockNum(); + } + } + + public short getRealTotalBlockNum() { + return (short) (getRealDataBlockNum() + getParityBlockNum()); + } + public ECSchema getSchema() { return schema; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 2533ca54e1..d57e6f1c81 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -592,13 +592,16 @@ public int getMaxReplicationStreams() { } public int getDefaultStorageNum(BlockInfo block) { - return block.isStriped() ? - ((BlockInfoStriped) block).getTotalBlockNum() : defaultReplication; + if (block.isStriped()) { + return ((BlockInfoStriped) block).getRealTotalBlockNum(); + } else { + return defaultReplication; + } } public short getMinStorageNum(BlockInfo block) { if (block.isStriped()) { - return getStripedDataBlockNum(block); + return ((BlockInfoStriped) block).getRealDataBlockNum(); } else { return minReplication; } @@ -707,7 +710,7 @@ private BlockInfo completeBlock(final BlockCollection bc, // OP_CLOSE edit on the standby). namesystem.adjustSafeModeBlockTotals(0, 1); final int minStorage = curBlock.isStriped() ? - ((BlockInfoStriped) curBlock).getDataBlockNum() : minReplication; + ((BlockInfoStriped) curBlock).getRealDataBlockNum() : minReplication; namesystem.incrementSafeBlockCount( Math.min(numNodes, minStorage), curBlock); @@ -3854,27 +3857,12 @@ boolean isNeededReplication(BlockInfo storedBlock, int expected, int current) { public short getExpectedReplicaNum(BlockCollection bc, BlockInfo block) { if (block.isStriped()) { - return (short) (getStripedDataBlockNum(block) + - ((BlockInfoStriped) block).getParityBlockNum()); + return ((BlockInfoStriped) block).getRealTotalBlockNum(); } else { return bc.getPreferredBlockReplication(); } } - - short getStripedDataBlockNum(BlockInfo block) { - assert block.isStriped(); - final BlockInfoStriped sblock = (BlockInfoStriped) block; - short dataBlockNum = sblock.getDataBlockNum(); - if (sblock.isComplete() || - sblock.getBlockUCState() == BlockUCState.COMMITTED) { - // if the sblock is committed/completed and its length is less than a - // full stripe, the minimum storage number needs to be adjusted - dataBlockNum = (short) Math.min(dataBlockNum, - (sblock.getNumBytes() - 1) / BLOCK_STRIPED_CELL_SIZE + 1); - } - return dataBlockNum; - } - + public long getMissingBlocksCount() { // not locking return this.neededReplications.getCorruptBlockSize(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 1dd0a432d6..e5483b6249 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -4878,7 +4878,7 @@ private synchronized void setBlockTotal(int total) { private synchronized void incrementSafeBlockCount(short storageNum, BlockInfo storedBlock) { final int safe = storedBlock.isStriped() ? - ((BlockInfoStriped) storedBlock).getDataBlockNum() : safeReplication; + ((BlockInfoStriped) storedBlock).getRealDataBlockNum() : safeReplication; if (storageNum == safe) { this.blockSafe++; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java index a43e37173f..1ea89515de 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java @@ -553,17 +553,6 @@ public void testSafeModeWhenZeroBlockLocations() throws IOException { } } - //TODO : test should be added to check safeMode with stripedBloks after stripedBlock related functions have been added in class MiniDFSCluster - @Test - public void testSafeModeWithCorruptSripedBlock() throws IOException { - try { - - } finally { - if(fs != null) fs.close(); - if(cluster!= null) cluster.shutdown(); - } - } - void checkGetBlockLocationsWorks(FileSystem fs, Path fileName) throws IOException { FileStatus stat = fs.getFileStatus(fileName); try { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeModeWithStripedFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeModeWithStripedFile.java new file mode 100644 index 0000000000..6f0bc71ad5 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeModeWithStripedFile.java @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs; + +import com.google.common.collect.Lists; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class TestSafeModeWithStripedFile { + + static final short DATA_BLK_NUM = HdfsConstants.NUM_DATA_BLOCKS; + static final short PARITY_BLK_NUM = HdfsConstants.NUM_PARITY_BLOCKS; + static final int numDNs = DATA_BLK_NUM + PARITY_BLK_NUM; + static final int cellSize = HdfsConstants.BLOCK_STRIPED_CELL_SIZE; + static final int blockSize = cellSize * 2; + + static MiniDFSCluster cluster; + static Configuration conf; + + @Before + public void setup() throws IOException { + conf = new HdfsConfiguration(); + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); + cluster.getFileSystem().getClient().createErasureCodingZone("/", + null, cellSize); + cluster.waitActive(); + + } + + @After + public void tearDown() throws IOException { + if (cluster != null) { + cluster.shutdown(); + } + } + + @Test + public void testStripedFile0() throws IOException { + doTest(cellSize, 1); + } + + @Test + public void testStripedFile1() throws IOException { + doTest(cellSize * 5, 5); + } + + /** + * This util writes a small block group whose size is given by caller. + * Then write another 2 full stripe blocks. + * Then shutdown all DNs and start again one by one. and verify the safemode + * status accordingly. + * + * @param smallSize file size of the small block group + * @param minStorages minimum replicas needed by the block so it can be safe + */ + private void doTest(int smallSize, int minStorages) throws IOException { + FileSystem fs = cluster.getFileSystem(); + // add 1 block + byte[] data = StripedFileTestUtil.generateBytes(smallSize); + Path smallFilePath = new Path("/testStripedFile_" + smallSize); + DFSTestUtil.writeFile(fs, smallFilePath, data); + + // If we only have 1 block, NN won't enter safemode in the first place + // because the threshold is 0 blocks. + // So we need to add another 2 blocks. + int bigSize = blockSize * DATA_BLK_NUM * 2; + Path bigFilePath = new Path("/testStripedFile_" + bigSize); + data = StripedFileTestUtil.generateBytes(bigSize); + DFSTestUtil.writeFile(fs, bigFilePath, data); + // now we have 3 blocks. NN needs 2 blocks to reach the threshold 0.9 of + // total blocks 3. + + // stopping all DNs + List dnprops = Lists.newArrayList(); + LocatedBlocks lbs = cluster.getNameNodeRpc() + .getBlockLocations(smallFilePath.toString(), 0, smallSize); + DatanodeInfo[] locations = lbs.get(0).getLocations(); + for (DatanodeInfo loc : locations) { + // keep the DNs that have smallFile in the head of dnprops + dnprops.add(cluster.stopDataNode(loc.getName())); + } + for (int i = 0; i < numDNs - locations.length; i++) { + dnprops.add(cluster.stopDataNode(0)); + } + + cluster.restartNameNode(0); + NameNode nn = cluster.getNameNode(); + assertTrue(cluster.getNameNode().isInSafeMode()); + assertEquals(0, NameNodeAdapter.getSafeModeSafeBlocks(nn)); + + // the block of smallFile doesn't reach minStorages, + // so the safe blocks count doesn't increment. + for (int i = 0; i < minStorages - 1; i++) { + cluster.restartDataNode(dnprops.remove(0)); + cluster.triggerBlockReports(); + assertEquals(0, NameNodeAdapter.getSafeModeSafeBlocks(nn)); + } + + // the block of smallFile reaches minStorages, + // so the safe blocks count increment. + cluster.restartDataNode(dnprops.remove(0)); + cluster.triggerBlockReports(); + assertEquals(1, NameNodeAdapter.getSafeModeSafeBlocks(nn)); + + // the 2 blocks of bigFile need DATA_BLK_NUM storages to be safe + for (int i = minStorages; i < DATA_BLK_NUM - 1; i++) { + cluster.restartDataNode(dnprops.remove(0)); + cluster.triggerBlockReports(); + assertTrue(nn.isInSafeMode()); + } + + cluster.restartDataNode(dnprops.remove(0)); + cluster.triggerBlockReports(); + assertFalse(nn.isInSafeMode()); + } + +}