HDDS-475. Block Allocation returns same BlockID on different keys creation.

Contributed by Nanda Kumar.
2018-09-17 14:08:39 -07:00 · 2018-09-17 14:08:39 -07:00 · 23a6137a40
commit 23a6137a40
parent 26d0c63a1e
1 changed files with 118 additions and 86 deletions
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java
@ -48,8 +48,6 @@
 import java.util.Map;
 import java.util.Random;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes
    .CHILL_MODE_EXCEPTION;
@ -76,7 +74,6 @@ public class BlockManagerImpl implements EventHandler<Boolean>,
  private final NodeManager nodeManager;
  private final Mapping containerManager;
  private final ReadWriteLock lock;
  private final long containerSize;
  private final DeletedBlockLog deletedBlockLog;
@ -113,7 +110,6 @@ public BlockManagerImpl(final Configuration conf,
            ScmConfigKeys.OZONE_SCM_CONTAINER_PROVISION_BATCH_SIZE,
            ScmConfigKeys.OZONE_SCM_CONTAINER_PROVISION_BATCH_SIZE_DEFAULT);
    rand = new Random();
    this.lock = new ReentrantReadWriteLock();
    mxBean = MBeans.register("BlockManager", "BlockManagerImpl", this);
@ -223,74 +219,29 @@ public AllocatedBlock allocateBlock(final long size,
    ContainerWithPipeline containerWithPipeline;
-    lock.readLock().lock();
+    // This is to optimize performance, if the below condition is evaluated
-    try {
+    // to false, then we can be sure that there are no containers in
-      // This is to optimize performance, if the below condition is evaluated
+    // ALLOCATED state.
-      // to false, then we can be sure that there are no containers in
+    // This can result in false positive, but it will never be false negative.
-      // ALLOCATED state.
+    // How can this result in false positive? We check if there are any
-      // This can result in false positive, but it will never be false negative.
+    // containers in ALLOCATED state, this check doesn't care about the
-      // How can this result in false positive? We check if there are any
+    // USER of the containers. So there might be cases where a different
-      // containers in ALLOCATED state, this check doesn't care about the
+    // USER has few containers in ALLOCATED state, which will result in
-      // USER of the containers. So there might be cases where a different
+    // false positive.
-      // USER has few containers in ALLOCATED state, which will result in
+    if (!containerManager.getStateManager().getContainerStateMap()
-      // false positive.
+        .getContainerIDsByState(HddsProtos.LifeCycleState.ALLOCATED)
-      if (!containerManager.getStateManager().getContainerStateMap()
+        .isEmpty()) {
-          .getContainerIDsByState(HddsProtos.LifeCycleState.ALLOCATED)
+      // Since the above check can result in false positive, we have to do
-          .isEmpty()) {
+      // the actual check and find out if there are containers in ALLOCATED
-        // Since the above check can result in false positive, we have to do
+      // state matching our criteria.
        // the actual check and find out if there are containers in ALLOCATED
        // state matching our criteria.
        synchronized (this) {
          // Using containers from ALLOCATED state should be done within
          // synchronized block (or) write lock. Since we already hold a
          // read lock, we will end up in deadlock situation if we take
          // write lock here.
          containerWithPipeline = containerManager
              .getMatchingContainerWithPipeline(size, owner, type, factor,
                  HddsProtos.LifeCycleState.ALLOCATED);
          if (containerWithPipeline != null) {
            containerManager.updateContainerState(
                containerWithPipeline.getContainerInfo().getContainerID(),
                HddsProtos.LifeCycleEvent.CREATE);
            return newBlock(containerWithPipeline,
                HddsProtos.LifeCycleState.ALLOCATED);
          }
        }
      }
      // Since we found no allocated containers that match our criteria, let us
      // look for OPEN containers that match the criteria.
      containerWithPipeline = containerManager
          .getMatchingContainerWithPipeline(size, owner, type, factor,
              HddsProtos.LifeCycleState.OPEN);
      if (containerWithPipeline != null) {
        return newBlock(containerWithPipeline, HddsProtos.LifeCycleState.OPEN);
      }
      // We found neither ALLOCATED or OPEN Containers. This generally means
      // that most of our containers are full or we have not allocated
      // containers of the type and replication factor. So let us go and
      // allocate some.
      // Even though we have already checked the containers in ALLOCATED
      // state, we have to check again as we only hold a read lock.
      // Some other thread might have pre-allocated container in meantime.
      synchronized (this) {
-        if (!containerManager.getStateManager().getContainerStateMap()
+        // Using containers from ALLOCATED state should be done within
-            .getContainerIDsByState(HddsProtos.LifeCycleState.ALLOCATED)
+        // synchronized block (or) write lock. Since we already hold a
-            .isEmpty()) {
+        // read lock, we will end up in deadlock situation if we take
-          containerWithPipeline = containerManager
+        // write lock here.
-              .getMatchingContainerWithPipeline(size, owner, type, factor,
+        containerWithPipeline = containerManager
-                  HddsProtos.LifeCycleState.ALLOCATED);
+            .getMatchingContainerWithPipeline(size, owner, type, factor,
-        }
+                HddsProtos.LifeCycleState.ALLOCATED);
        if (containerWithPipeline == null) {
          preAllocateContainers(containerProvisionBatchSize,
              type, factor, owner);
          containerWithPipeline = containerManager
              .getMatchingContainerWithPipeline(size, owner, type, factor,
                  HddsProtos.LifeCycleState.ALLOCATED);
        }
        if (containerWithPipeline != null) {
          containerManager.updateContainerState(
              containerWithPipeline.getContainerInfo().getContainerID(),
@ -299,19 +250,55 @@ public AllocatedBlock allocateBlock(final long size,
              HddsProtos.LifeCycleState.ALLOCATED);
        }
      }
      // we have tried all strategies we know and but somehow we are not able
      // to get a container for this block. Log that info and return a null.
      LOG.error(
          "Unable to allocate a block for the size: {}, type: {}, " +
              "factor: {}",
          size,
          type,
          factor);
      return null;
    } finally {
      lock.readLock().unlock();
    }
    // Since we found no allocated containers that match our criteria, let us
    // look for OPEN containers that match the criteria.
    containerWithPipeline = containerManager
        .getMatchingContainerWithPipeline(size, owner, type, factor,
            HddsProtos.LifeCycleState.OPEN);
    if (containerWithPipeline != null) {
      return newBlock(containerWithPipeline, HddsProtos.LifeCycleState.OPEN);
    }
    // We found neither ALLOCATED or OPEN Containers. This generally means
    // that most of our containers are full or we have not allocated
    // containers of the type and replication factor. So let us go and
    // allocate some.
    // Even though we have already checked the containers in ALLOCATED
    // state, we have to check again as we only hold a read lock.
    // Some other thread might have pre-allocated container in meantime.
    synchronized (this) {
      if (!containerManager.getStateManager().getContainerStateMap()
          .getContainerIDsByState(HddsProtos.LifeCycleState.ALLOCATED)
          .isEmpty()) {
        containerWithPipeline = containerManager
            .getMatchingContainerWithPipeline(size, owner, type, factor,
                HddsProtos.LifeCycleState.ALLOCATED);
      }
      if (containerWithPipeline == null) {
        preAllocateContainers(containerProvisionBatchSize,
            type, factor, owner);
        containerWithPipeline = containerManager
            .getMatchingContainerWithPipeline(size, owner, type, factor,
                HddsProtos.LifeCycleState.ALLOCATED);
      }
      if (containerWithPipeline != null) {
        containerManager.updateContainerState(
            containerWithPipeline.getContainerInfo().getContainerID(),
            HddsProtos.LifeCycleEvent.CREATE);
        return newBlock(containerWithPipeline,
            HddsProtos.LifeCycleState.ALLOCATED);
      }
    }
    // we have tried all strategies we know and but somehow we are not able
    // to get a container for this block. Log that info and return a null.
    LOG.error(
        "Unable to allocate a block for the size: {}, type: {}, factor: {}",
        size, type, factor);
    return null;
  }
  /**
@ -330,9 +317,7 @@ private AllocatedBlock newBlock(ContainerWithPipeline containerWithPipeline,
    }
    // TODO : Revisit this local ID allocation when HA is added.
-    // TODO: this does not work well if multiple allocation kicks in a tight
+    long localID = UniqueId.next();
    // loop.
    long localID = Time.getUtcTime();
    long containerID = containerInfo.getContainerID();
    boolean createContainer = (state == HddsProtos.LifeCycleState.ALLOCATED);
@ -463,4 +448,51 @@ public boolean isScmInChillMode() {
  public static Logger getLogger() {
    return LOG;
  }
  /**
   * This class uses system current time milliseconds to generate unique id.
   */
  public static final class UniqueId {
    /*
     * When we represent time in milliseconds using 'long' data type,
     * the LSB bits are used. Currently we are only using 44 bits (LSB),
     * 20 bits (MSB) are not used.
     * We will exhaust this 44 bits only when we are in year 2525,
     * until then we can safely use this 20 bits (MSB) for offset to generate
     * unique id within millisecond.
     *
     * Year        : Mon Dec 31 18:49:04 IST 2525
     * TimeInMillis: 17545641544247
     * Binary Representation:
     *   MSB (20 bits): 0000 0000 0000 0000 0000
     *   LSB (44 bits): 1111 1111 0101 0010 1001 1011 1011 0100 1010 0011 0111
     *
     * We have 20 bits to run counter, we should exclude the first bit (MSB)
     * as we don't want to deal with negative values.
     * To be on safer side we will use 'short' data type which is of length
     * 16 bits and will give us 65,536 values for offset.
     *
     */
    private static volatile short offset = 0;
    /**
     * Private constructor so that no one can instantiate this class.
     */
    private UniqueId() {}
    /**
     * Calculate and returns next unique id based on System#currentTimeMillis.
     *
     * @return unique long value
     */
    public static synchronized long next() {
      long utcTime = Time.getUtcTime();
      if ((utcTime & 0xFFFF000000000000L) == 0) {
        return utcTime << Short.SIZE | (offset++ & 0x0000FFFF);
      }
      throw new RuntimeException("Got invalid UTC time," +
          " cannot generate unique Id. UTC Time: " + utcTime);
    }
  }
 }