From 300f560fcca879d212eadffaa59fbbb7017c9a3f Mon Sep 17 00:00:00 2001 From: Giovanni Matteo Fumarola Date: Wed, 28 Nov 2018 11:33:22 -0800 Subject: [PATCH] HDFS-14102. Performance improvement in BlockPlacementPolicyDefault. Contributed by Beluga Behr. --- .../BlockPlacementPolicyDefault.java | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index d396845d48..5b8a8b419f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -71,6 +71,9 @@ protected StringBuilder initialValue() { CHOOSE_RANDOM_REASONS = ThreadLocal .withInitial(() -> new HashMap()); + private static final BlockPlacementStatus ONE_RACK_PLACEMENT = + new BlockPlacementStatusDefault(1, 1, 1); + private enum NodeNotChosenReason { NOT_IN_SERVICE("the node is not in service"), NODE_STALE("the node is stale"), @@ -1029,22 +1032,23 @@ private DatanodeStorageInfo[] getPipeline(Node writer, @Override public BlockPlacementStatus verifyBlockPlacement(DatanodeInfo[] locs, int numberOfReplicas) { - if (locs == null) + if (locs == null) { locs = DatanodeDescriptor.EMPTY_ARRAY; + } if (!clusterMap.hasClusterEverBeenMultiRack()) { // only one rack - return new BlockPlacementStatusDefault(1, 1, 1); + return ONE_RACK_PLACEMENT; } - int minRacks = 2; - minRacks = Math.min(minRacks, numberOfReplicas); + final int minRacks = Math.min(2, numberOfReplicas); // 1. Check that all locations are different. // 2. Count locations on different racks. - Set racks = new TreeSet<>(); - for (DatanodeInfo dn : locs) - racks.add(dn.getNetworkLocation()); - return new BlockPlacementStatusDefault(racks.size(), minRacks, - clusterMap.getNumOfRacks()); + final long rackCount = Arrays.asList(locs).stream() + .map(dn -> dn.getNetworkLocation()).distinct().count(); + + return new BlockPlacementStatusDefault(Math.toIntExact(rackCount), + minRacks, clusterMap.getNumOfRacks()); } + /** * Decide whether deleting the specified replica of the block still makes * the block conform to the configured block placement policy.