HDDS-475. Block Allocation returns same BlockID on different keys creation.

Contributed by Nanda Kumar.
This commit is contained in:
Anu Engineer 2018-09-17 14:08:39 -07:00
parent 26d0c63a1e
commit 23a6137a40

View File

@ -48,8 +48,6 @@
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes import static org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes
.CHILL_MODE_EXCEPTION; .CHILL_MODE_EXCEPTION;
@ -76,7 +74,6 @@ public class BlockManagerImpl implements EventHandler<Boolean>,
private final NodeManager nodeManager; private final NodeManager nodeManager;
private final Mapping containerManager; private final Mapping containerManager;
private final ReadWriteLock lock;
private final long containerSize; private final long containerSize;
private final DeletedBlockLog deletedBlockLog; private final DeletedBlockLog deletedBlockLog;
@ -113,7 +110,6 @@ public BlockManagerImpl(final Configuration conf,
ScmConfigKeys.OZONE_SCM_CONTAINER_PROVISION_BATCH_SIZE, ScmConfigKeys.OZONE_SCM_CONTAINER_PROVISION_BATCH_SIZE,
ScmConfigKeys.OZONE_SCM_CONTAINER_PROVISION_BATCH_SIZE_DEFAULT); ScmConfigKeys.OZONE_SCM_CONTAINER_PROVISION_BATCH_SIZE_DEFAULT);
rand = new Random(); rand = new Random();
this.lock = new ReentrantReadWriteLock();
mxBean = MBeans.register("BlockManager", "BlockManagerImpl", this); mxBean = MBeans.register("BlockManager", "BlockManagerImpl", this);
@ -223,74 +219,29 @@ public AllocatedBlock allocateBlock(final long size,
ContainerWithPipeline containerWithPipeline; ContainerWithPipeline containerWithPipeline;
lock.readLock().lock(); // This is to optimize performance, if the below condition is evaluated
try { // to false, then we can be sure that there are no containers in
// This is to optimize performance, if the below condition is evaluated // ALLOCATED state.
// to false, then we can be sure that there are no containers in // This can result in false positive, but it will never be false negative.
// ALLOCATED state. // How can this result in false positive? We check if there are any
// This can result in false positive, but it will never be false negative. // containers in ALLOCATED state, this check doesn't care about the
// How can this result in false positive? We check if there are any // USER of the containers. So there might be cases where a different
// containers in ALLOCATED state, this check doesn't care about the // USER has few containers in ALLOCATED state, which will result in
// USER of the containers. So there might be cases where a different // false positive.
// USER has few containers in ALLOCATED state, which will result in if (!containerManager.getStateManager().getContainerStateMap()
// false positive. .getContainerIDsByState(HddsProtos.LifeCycleState.ALLOCATED)
if (!containerManager.getStateManager().getContainerStateMap() .isEmpty()) {
.getContainerIDsByState(HddsProtos.LifeCycleState.ALLOCATED) // Since the above check can result in false positive, we have to do
.isEmpty()) { // the actual check and find out if there are containers in ALLOCATED
// Since the above check can result in false positive, we have to do // state matching our criteria.
// the actual check and find out if there are containers in ALLOCATED
// state matching our criteria.
synchronized (this) {
// Using containers from ALLOCATED state should be done within
// synchronized block (or) write lock. Since we already hold a
// read lock, we will end up in deadlock situation if we take
// write lock here.
containerWithPipeline = containerManager
.getMatchingContainerWithPipeline(size, owner, type, factor,
HddsProtos.LifeCycleState.ALLOCATED);
if (containerWithPipeline != null) {
containerManager.updateContainerState(
containerWithPipeline.getContainerInfo().getContainerID(),
HddsProtos.LifeCycleEvent.CREATE);
return newBlock(containerWithPipeline,
HddsProtos.LifeCycleState.ALLOCATED);
}
}
}
// Since we found no allocated containers that match our criteria, let us
// look for OPEN containers that match the criteria.
containerWithPipeline = containerManager
.getMatchingContainerWithPipeline(size, owner, type, factor,
HddsProtos.LifeCycleState.OPEN);
if (containerWithPipeline != null) {
return newBlock(containerWithPipeline, HddsProtos.LifeCycleState.OPEN);
}
// We found neither ALLOCATED or OPEN Containers. This generally means
// that most of our containers are full or we have not allocated
// containers of the type and replication factor. So let us go and
// allocate some.
// Even though we have already checked the containers in ALLOCATED
// state, we have to check again as we only hold a read lock.
// Some other thread might have pre-allocated container in meantime.
synchronized (this) { synchronized (this) {
if (!containerManager.getStateManager().getContainerStateMap() // Using containers from ALLOCATED state should be done within
.getContainerIDsByState(HddsProtos.LifeCycleState.ALLOCATED) // synchronized block (or) write lock. Since we already hold a
.isEmpty()) { // read lock, we will end up in deadlock situation if we take
containerWithPipeline = containerManager // write lock here.
.getMatchingContainerWithPipeline(size, owner, type, factor, containerWithPipeline = containerManager
HddsProtos.LifeCycleState.ALLOCATED); .getMatchingContainerWithPipeline(size, owner, type, factor,
} HddsProtos.LifeCycleState.ALLOCATED);
if (containerWithPipeline == null) {
preAllocateContainers(containerProvisionBatchSize,
type, factor, owner);
containerWithPipeline = containerManager
.getMatchingContainerWithPipeline(size, owner, type, factor,
HddsProtos.LifeCycleState.ALLOCATED);
}
if (containerWithPipeline != null) { if (containerWithPipeline != null) {
containerManager.updateContainerState( containerManager.updateContainerState(
containerWithPipeline.getContainerInfo().getContainerID(), containerWithPipeline.getContainerInfo().getContainerID(),
@ -299,19 +250,55 @@ public AllocatedBlock allocateBlock(final long size,
HddsProtos.LifeCycleState.ALLOCATED); HddsProtos.LifeCycleState.ALLOCATED);
} }
} }
// we have tried all strategies we know and but somehow we are not able
// to get a container for this block. Log that info and return a null.
LOG.error(
"Unable to allocate a block for the size: {}, type: {}, " +
"factor: {}",
size,
type,
factor);
return null;
} finally {
lock.readLock().unlock();
} }
// Since we found no allocated containers that match our criteria, let us
// look for OPEN containers that match the criteria.
containerWithPipeline = containerManager
.getMatchingContainerWithPipeline(size, owner, type, factor,
HddsProtos.LifeCycleState.OPEN);
if (containerWithPipeline != null) {
return newBlock(containerWithPipeline, HddsProtos.LifeCycleState.OPEN);
}
// We found neither ALLOCATED or OPEN Containers. This generally means
// that most of our containers are full or we have not allocated
// containers of the type and replication factor. So let us go and
// allocate some.
// Even though we have already checked the containers in ALLOCATED
// state, we have to check again as we only hold a read lock.
// Some other thread might have pre-allocated container in meantime.
synchronized (this) {
if (!containerManager.getStateManager().getContainerStateMap()
.getContainerIDsByState(HddsProtos.LifeCycleState.ALLOCATED)
.isEmpty()) {
containerWithPipeline = containerManager
.getMatchingContainerWithPipeline(size, owner, type, factor,
HddsProtos.LifeCycleState.ALLOCATED);
}
if (containerWithPipeline == null) {
preAllocateContainers(containerProvisionBatchSize,
type, factor, owner);
containerWithPipeline = containerManager
.getMatchingContainerWithPipeline(size, owner, type, factor,
HddsProtos.LifeCycleState.ALLOCATED);
}
if (containerWithPipeline != null) {
containerManager.updateContainerState(
containerWithPipeline.getContainerInfo().getContainerID(),
HddsProtos.LifeCycleEvent.CREATE);
return newBlock(containerWithPipeline,
HddsProtos.LifeCycleState.ALLOCATED);
}
}
// we have tried all strategies we know and but somehow we are not able
// to get a container for this block. Log that info and return a null.
LOG.error(
"Unable to allocate a block for the size: {}, type: {}, factor: {}",
size, type, factor);
return null;
} }
/** /**
@ -330,9 +317,7 @@ private AllocatedBlock newBlock(ContainerWithPipeline containerWithPipeline,
} }
// TODO : Revisit this local ID allocation when HA is added. // TODO : Revisit this local ID allocation when HA is added.
// TODO: this does not work well if multiple allocation kicks in a tight long localID = UniqueId.next();
// loop.
long localID = Time.getUtcTime();
long containerID = containerInfo.getContainerID(); long containerID = containerInfo.getContainerID();
boolean createContainer = (state == HddsProtos.LifeCycleState.ALLOCATED); boolean createContainer = (state == HddsProtos.LifeCycleState.ALLOCATED);
@ -463,4 +448,51 @@ public boolean isScmInChillMode() {
public static Logger getLogger() { public static Logger getLogger() {
return LOG; return LOG;
} }
/**
* This class uses system current time milliseconds to generate unique id.
*/
public static final class UniqueId {
/*
* When we represent time in milliseconds using 'long' data type,
* the LSB bits are used. Currently we are only using 44 bits (LSB),
* 20 bits (MSB) are not used.
* We will exhaust this 44 bits only when we are in year 2525,
* until then we can safely use this 20 bits (MSB) for offset to generate
* unique id within millisecond.
*
* Year : Mon Dec 31 18:49:04 IST 2525
* TimeInMillis: 17545641544247
* Binary Representation:
* MSB (20 bits): 0000 0000 0000 0000 0000
* LSB (44 bits): 1111 1111 0101 0010 1001 1011 1011 0100 1010 0011 0111
*
* We have 20 bits to run counter, we should exclude the first bit (MSB)
* as we don't want to deal with negative values.
* To be on safer side we will use 'short' data type which is of length
* 16 bits and will give us 65,536 values for offset.
*
*/
private static volatile short offset = 0;
/**
* Private constructor so that no one can instantiate this class.
*/
private UniqueId() {}
/**
* Calculate and returns next unique id based on System#currentTimeMillis.
*
* @return unique long value
*/
public static synchronized long next() {
long utcTime = Time.getUtcTime();
if ((utcTime & 0xFFFF000000000000L) == 0) {
return utcTime << Short.SIZE | (offset++ & 0x0000FFFF);
}
throw new RuntimeException("Got invalid UTC time," +
" cannot generate unique Id. UTC Time: " + utcTime);
}
}
} }