HDFS-2229. Fix a deadlock in namenode by enforcing lock acquisition ordering.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1156847 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2011-08-11 22:16:16 +00:00
parent 7528853197
commit 1dd48b1aee
3 changed files with 42 additions and 36 deletions

View File

@ -954,6 +954,9 @@ Trunk (unreleased changes)
HDFS-2245. Fix a NullPointerException in BlockManager.chooseTarget(..). HDFS-2245. Fix a NullPointerException in BlockManager.chooseTarget(..).
(szetszwo) (szetszwo)
HDFS-2229. Fix a deadlock in namenode by enforcing lock acquisition
ordering. (szetszwo)
BREAKDOWN OF HDFS-1073 SUBTASKS BREAKDOWN OF HDFS-1073 SUBTASKS
HDFS-1521. Persist transaction ID on disk between NN restarts. HDFS-1521. Persist transaction ID on disk between NN restarts.

View File

@ -1829,39 +1829,37 @@ private void invalidateCorruptReplicas(Block blk) {
* over or under replicated. Place it into the respective queue. * over or under replicated. Place it into the respective queue.
*/ */
public void processMisReplicatedBlocks() { public void processMisReplicatedBlocks() {
long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0; assert namesystem.hasWriteLock();
namesystem.writeLock();
try {
neededReplications.clear();
for (BlockInfo block : blocksMap.getBlocks()) {
INodeFile fileINode = block.getINode();
if (fileINode == null) {
// block does not belong to any file
nrInvalid++;
addToInvalidates(block);
continue;
}
// calculate current replication
short expectedReplication = fileINode.getReplication();
NumberReplicas num = countNodes(block);
int numCurrentReplica = num.liveReplicas();
// add to under-replicated queue if need to be
if (isNeededReplication(block, expectedReplication, numCurrentReplica)) {
if (neededReplications.add(block, numCurrentReplica, num
.decommissionedReplicas(), expectedReplication)) {
nrUnderReplicated++;
}
}
if (numCurrentReplica > expectedReplication) { long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0;
// over-replicated block neededReplications.clear();
nrOverReplicated++; for (BlockInfo block : blocksMap.getBlocks()) {
processOverReplicatedBlock(block, expectedReplication, null, null); INodeFile fileINode = block.getINode();
if (fileINode == null) {
// block does not belong to any file
nrInvalid++;
addToInvalidates(block);
continue;
}
// calculate current replication
short expectedReplication = fileINode.getReplication();
NumberReplicas num = countNodes(block);
int numCurrentReplica = num.liveReplicas();
// add to under-replicated queue if need to be
if (isNeededReplication(block, expectedReplication, numCurrentReplica)) {
if (neededReplications.add(block, numCurrentReplica, num
.decommissionedReplicas(), expectedReplication)) {
nrUnderReplicated++;
} }
} }
} finally {
namesystem.writeUnlock(); if (numCurrentReplica > expectedReplication) {
// over-replicated block
nrOverReplicated++;
processOverReplicatedBlock(block, expectedReplication, null, null);
}
} }
LOG.info("Total number of blocks = " + blocksMap.size()); LOG.info("Total number of blocks = " + blocksMap.size());
LOG.info("Number of invalid blocks = " + nrInvalid); LOG.info("Number of invalid blocks = " + nrInvalid);
LOG.info("Number of under-replicated blocks = " + nrUnderReplicated); LOG.info("Number of under-replicated blocks = " + nrUnderReplicated);

View File

@ -313,13 +313,18 @@ void activateSecretManager() throws IOException {
* Activate FSNamesystem daemons. * Activate FSNamesystem daemons.
*/ */
void activate(Configuration conf) throws IOException { void activate(Configuration conf) throws IOException {
setBlockTotal(); writeLock();
blockManager.activate(conf); try {
this.lmthread = new Daemon(leaseManager.new Monitor()); setBlockTotal();
lmthread.start(); blockManager.activate(conf);
this.nnrmthread = new Daemon(new NameNodeResourceMonitor()); this.lmthread = new Daemon(leaseManager.new Monitor());
nnrmthread.start(); lmthread.start();
this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
nnrmthread.start();
} finally {
writeUnlock();
}
registerMXBean(); registerMXBean();
DefaultMetricsSystem.instance().register(this); DefaultMetricsSystem.instance().register(this);