HDFS-2229. Fix a deadlock in namenode by enforcing lock acquisition ordering.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1156847 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7528853197
commit
1dd48b1aee
@ -954,6 +954,9 @@ Trunk (unreleased changes)
|
|||||||
HDFS-2245. Fix a NullPointerException in BlockManager.chooseTarget(..).
|
HDFS-2245. Fix a NullPointerException in BlockManager.chooseTarget(..).
|
||||||
(szetszwo)
|
(szetszwo)
|
||||||
|
|
||||||
|
HDFS-2229. Fix a deadlock in namenode by enforcing lock acquisition
|
||||||
|
ordering. (szetszwo)
|
||||||
|
|
||||||
BREAKDOWN OF HDFS-1073 SUBTASKS
|
BREAKDOWN OF HDFS-1073 SUBTASKS
|
||||||
|
|
||||||
HDFS-1521. Persist transaction ID on disk between NN restarts.
|
HDFS-1521. Persist transaction ID on disk between NN restarts.
|
||||||
|
@ -1829,39 +1829,37 @@ private void invalidateCorruptReplicas(Block blk) {
|
|||||||
* over or under replicated. Place it into the respective queue.
|
* over or under replicated. Place it into the respective queue.
|
||||||
*/
|
*/
|
||||||
public void processMisReplicatedBlocks() {
|
public void processMisReplicatedBlocks() {
|
||||||
long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0;
|
assert namesystem.hasWriteLock();
|
||||||
namesystem.writeLock();
|
|
||||||
try {
|
|
||||||
neededReplications.clear();
|
|
||||||
for (BlockInfo block : blocksMap.getBlocks()) {
|
|
||||||
INodeFile fileINode = block.getINode();
|
|
||||||
if (fileINode == null) {
|
|
||||||
// block does not belong to any file
|
|
||||||
nrInvalid++;
|
|
||||||
addToInvalidates(block);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// calculate current replication
|
|
||||||
short expectedReplication = fileINode.getReplication();
|
|
||||||
NumberReplicas num = countNodes(block);
|
|
||||||
int numCurrentReplica = num.liveReplicas();
|
|
||||||
// add to under-replicated queue if need to be
|
|
||||||
if (isNeededReplication(block, expectedReplication, numCurrentReplica)) {
|
|
||||||
if (neededReplications.add(block, numCurrentReplica, num
|
|
||||||
.decommissionedReplicas(), expectedReplication)) {
|
|
||||||
nrUnderReplicated++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (numCurrentReplica > expectedReplication) {
|
long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0;
|
||||||
// over-replicated block
|
neededReplications.clear();
|
||||||
nrOverReplicated++;
|
for (BlockInfo block : blocksMap.getBlocks()) {
|
||||||
processOverReplicatedBlock(block, expectedReplication, null, null);
|
INodeFile fileINode = block.getINode();
|
||||||
|
if (fileINode == null) {
|
||||||
|
// block does not belong to any file
|
||||||
|
nrInvalid++;
|
||||||
|
addToInvalidates(block);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// calculate current replication
|
||||||
|
short expectedReplication = fileINode.getReplication();
|
||||||
|
NumberReplicas num = countNodes(block);
|
||||||
|
int numCurrentReplica = num.liveReplicas();
|
||||||
|
// add to under-replicated queue if need to be
|
||||||
|
if (isNeededReplication(block, expectedReplication, numCurrentReplica)) {
|
||||||
|
if (neededReplications.add(block, numCurrentReplica, num
|
||||||
|
.decommissionedReplicas(), expectedReplication)) {
|
||||||
|
nrUnderReplicated++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
|
||||||
namesystem.writeUnlock();
|
if (numCurrentReplica > expectedReplication) {
|
||||||
|
// over-replicated block
|
||||||
|
nrOverReplicated++;
|
||||||
|
processOverReplicatedBlock(block, expectedReplication, null, null);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Total number of blocks = " + blocksMap.size());
|
LOG.info("Total number of blocks = " + blocksMap.size());
|
||||||
LOG.info("Number of invalid blocks = " + nrInvalid);
|
LOG.info("Number of invalid blocks = " + nrInvalid);
|
||||||
LOG.info("Number of under-replicated blocks = " + nrUnderReplicated);
|
LOG.info("Number of under-replicated blocks = " + nrUnderReplicated);
|
||||||
|
@ -313,13 +313,18 @@ void activateSecretManager() throws IOException {
|
|||||||
* Activate FSNamesystem daemons.
|
* Activate FSNamesystem daemons.
|
||||||
*/
|
*/
|
||||||
void activate(Configuration conf) throws IOException {
|
void activate(Configuration conf) throws IOException {
|
||||||
setBlockTotal();
|
writeLock();
|
||||||
blockManager.activate(conf);
|
try {
|
||||||
this.lmthread = new Daemon(leaseManager.new Monitor());
|
setBlockTotal();
|
||||||
lmthread.start();
|
blockManager.activate(conf);
|
||||||
|
|
||||||
this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
|
this.lmthread = new Daemon(leaseManager.new Monitor());
|
||||||
nnrmthread.start();
|
lmthread.start();
|
||||||
|
this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
|
||||||
|
nnrmthread.start();
|
||||||
|
} finally {
|
||||||
|
writeUnlock();
|
||||||
|
}
|
||||||
|
|
||||||
registerMXBean();
|
registerMXBean();
|
||||||
DefaultMetricsSystem.instance().register(this);
|
DefaultMetricsSystem.instance().register(this);
|
||||||
|
Loading…
Reference in New Issue
Block a user