diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index e74e6d91db..d3ba1b5a21 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -648,6 +648,9 @@ Release 2.6.1 - UNRELEASED HDFS-7503. Namenode restart after large deletions can cause slow processReport. (Arpit Agarwal) + HDFS-7443. Datanode upgrade to BLOCKID_BASED_LAYOUT fails if duplicate + block files are present in the same volume (cmccabe) + Release 2.6.0 - 2014-11-18 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java index 15e7f552bd..3ea8ce36df 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.datanode; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ComparisonChain; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.util.concurrent.Futures; @@ -57,13 +58,16 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; +import java.util.TreeMap; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; @@ -979,10 +983,10 @@ private void linkAllBlocks(DataNode datanode, File fromDir, File fromBbwDir, } private static class LinkArgs { - public File src; - public File dst; + File src; + File dst; - public LinkArgs(File src, File dst) { + LinkArgs(File src, File dst) { this.src = src; this.dst = dst; } @@ -999,9 +1003,19 @@ static void linkBlocks(DataNode datanode, File from, File to, int oldLV, upgradeToIdBasedLayout = true; } - final List idBasedLayoutSingleLinks = Lists.newArrayList(); + final ArrayList idBasedLayoutSingleLinks = Lists.newArrayList(); linkBlocksHelper(from, to, oldLV, hl, upgradeToIdBasedLayout, to, idBasedLayoutSingleLinks); + + // Detect and remove duplicate entries. + final ArrayList duplicates = + findDuplicateEntries(idBasedLayoutSingleLinks); + if (!duplicates.isEmpty()) { + LOG.error("There are " + duplicates.size() + " duplicate block " + + "entries within the same volume."); + removeDuplicateEntries(idBasedLayoutSingleLinks, duplicates); + } + int numLinkWorkers = datanode.getConf().getInt( DFSConfigKeys.DFS_DATANODE_BLOCK_ID_LAYOUT_UPGRADE_THREADS_KEY, DFSConfigKeys.DFS_DATANODE_BLOCK_ID_LAYOUT_UPGRADE_THREADS); @@ -1028,7 +1042,162 @@ public Void call() throws IOException { Futures.get(f, IOException.class); } } - + + /** + * Find duplicate entries with an array of LinkArgs. + * Duplicate entries are entries with the same last path component. + */ + static ArrayList findDuplicateEntries(ArrayList all) { + // Find duplicates by sorting the list by the final path component. + Collections.sort(all, new Comparator() { + /** + * Compare two LinkArgs objects, such that objects with the same + * terminal source path components are grouped together. + */ + @Override + public int compare(LinkArgs a, LinkArgs b) { + return ComparisonChain.start(). + compare(a.src.getName(), b.src.getName()). + compare(a.src, b.src). + compare(a.dst, b.dst). + result(); + } + }); + final ArrayList duplicates = Lists.newArrayList(); + Long prevBlockId = null; + boolean prevWasMeta = false; + boolean addedPrev = false; + for (int i = 0; i < all.size(); i++) { + LinkArgs args = all.get(i); + long blockId = Block.getBlockId(args.src.getName()); + boolean isMeta = Block.isMetaFilename(args.src.getName()); + if ((prevBlockId == null) || + (prevBlockId.longValue() != blockId)) { + prevBlockId = blockId; + addedPrev = false; + } else if (isMeta == prevWasMeta) { + // If we saw another file for the same block ID previously, + // and it had the same meta-ness as this file, we have a + // duplicate. + duplicates.add(args); + if (!addedPrev) { + duplicates.add(all.get(i - 1)); + } + addedPrev = true; + } else { + addedPrev = false; + } + prevWasMeta = isMeta; + } + return duplicates; + } + + /** + * Remove duplicate entries from the list. + * We do this by choosing: + * 1. the entries with the highest genstamp (this takes priority), + * 2. the entries with the longest block files, + * 3. arbitrarily, if neither #1 nor #2 gives a clear winner. + * + * Block and metadata files form a pair-- if you take a metadata file from + * one subdirectory, you must also take the block file from that + * subdirectory. + */ + private static void removeDuplicateEntries(ArrayList all, + ArrayList duplicates) { + // Maps blockId -> metadata file with highest genstamp + TreeMap> highestGenstamps = + new TreeMap>(); + for (LinkArgs duplicate : duplicates) { + if (!Block.isMetaFilename(duplicate.src.getName())) { + continue; + } + long blockId = Block.getBlockId(duplicate.src.getName()); + List prevHighest = highestGenstamps.get(blockId); + if (prevHighest == null) { + List highest = new LinkedList(); + highest.add(duplicate); + highestGenstamps.put(blockId, highest); + continue; + } + long prevGenstamp = + Block.getGenerationStamp(prevHighest.get(0).src.getName()); + long genstamp = Block.getGenerationStamp(duplicate.src.getName()); + if (genstamp < prevGenstamp) { + continue; + } + if (genstamp > prevGenstamp) { + prevHighest.clear(); + } + prevHighest.add(duplicate); + } + + // Remove data / metadata entries that don't have the highest genstamp + // from the duplicates list. + for (Iterator iter = duplicates.iterator(); iter.hasNext(); ) { + LinkArgs duplicate = iter.next(); + long blockId = Block.getBlockId(duplicate.src.getName()); + List highest = highestGenstamps.get(blockId); + if (highest != null) { + boolean found = false; + for (LinkArgs high : highest) { + if (high.src.getParent().equals(duplicate.src.getParent())) { + found = true; + break; + } + } + if (!found) { + LOG.warn("Unexpectedly low genstamp on " + + duplicate.src.getAbsolutePath() + "."); + iter.remove(); + } + } + } + + // Find the longest block files + // We let the "last guy win" here, since we're only interested in + // preserving one block file / metadata file pair. + TreeMap longestBlockFiles = new TreeMap(); + for (LinkArgs duplicate : duplicates) { + if (Block.isMetaFilename(duplicate.src.getName())) { + continue; + } + long blockId = Block.getBlockId(duplicate.src.getName()); + LinkArgs prevLongest = longestBlockFiles.get(blockId); + if (prevLongest == null) { + longestBlockFiles.put(blockId, duplicate); + continue; + } + long blockLength = duplicate.src.length(); + long prevBlockLength = prevLongest.src.length(); + if (blockLength < prevBlockLength) { + LOG.warn("Unexpectedly short length on " + + duplicate.src.getAbsolutePath() + "."); + continue; + } + if (blockLength > prevBlockLength) { + LOG.warn("Unexpectedly short length on " + + prevLongest.src.getAbsolutePath() + "."); + } + longestBlockFiles.put(blockId, duplicate); + } + + // Remove data / metadata entries that aren't the longest, or weren't + // arbitrarily selected by us. + for (Iterator iter = all.iterator(); iter.hasNext(); ) { + LinkArgs args = iter.next(); + long blockId = Block.getBlockId(args.src.getName()); + LinkArgs bestDuplicate = longestBlockFiles.get(blockId); + if (bestDuplicate == null) { + continue; // file has no duplicates + } + if (!bestDuplicate.src.getParent().equals(args.src.getParent())) { + LOG.warn("Discarding " + args.src.getAbsolutePath() + "."); + iter.remove(); + } + } + } + static void linkBlocksHelper(File from, File to, int oldLV, HardLink hl, boolean upgradeToIdBasedLayout, File blockRoot, List idBasedLayoutSingleLinks) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-24-datanode-dir.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-24-datanode-dir.tgz index 9f666fed09..032bf0d3d6 100644 Binary files a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-24-datanode-dir.tgz and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-24-datanode-dir.tgz differ