From 5652131d2ea68c408dd3cd8bee31723642a8cdde Mon Sep 17 00:00:00 2001 From: yliu Date: Wed, 2 Sep 2015 08:58:51 +0800 Subject: [PATCH] HDFS-8995. Flaw in registration bookeeping can make DN die on reconnect. (Kihwal Lee via yliu) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../apache/hadoop/hdfs/server/datanode/BPOfferService.java | 3 +-- .../apache/hadoop/hdfs/server/datanode/BPServiceActor.java | 7 ++++--- .../org/apache/hadoop/hdfs/server/datanode/DataNode.java | 5 +---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 57ddcb2943..ea398f2cd5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1302,6 +1302,9 @@ Release 2.7.2 - UNRELEASED HDFS-8891. HDFS concat should keep srcs order. (Yong Zhang via jing9) + HDFS-8995. Flaw in registration bookeeping can make DN die on reconnect. + (Kihwal Lee via yliu) + Release 2.7.1 - 2015-07-06 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java index 92323f1530..7aab4f71ae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java @@ -351,9 +351,8 @@ void registrationSucceeded(BPServiceActor bpServiceActor, reg.getStorageInfo().getNamespaceID(), "namespace ID"); checkNSEquality(bpRegistration.getStorageInfo().getClusterID(), reg.getStorageInfo().getClusterID(), "cluster ID"); - } else { - bpRegistration = reg; } + bpRegistration = reg; dn.bpRegistrationSucceeded(bpRegistration, getBlockPoolId()); // Add the initial block token secret keys to the DN's secret manager. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java index 1817427822..85ea6ae77a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java @@ -767,15 +767,16 @@ private void offerService() throws Exception { void register(NamespaceInfo nsInfo) throws IOException { // The handshake() phase loaded the block pool storage // off disk - so update the bpRegistration object from that info - bpRegistration = bpos.createRegistration(); + DatanodeRegistration newBpRegistration = bpos.createRegistration(); LOG.info(this + " beginning handshake with NN"); while (shouldRun()) { try { // Use returned registration from namenode with updated fields - bpRegistration = bpNamenode.registerDatanode(bpRegistration); - bpRegistration.setNamespaceInfo(nsInfo); + newBpRegistration = bpNamenode.registerDatanode(newBpRegistration); + newBpRegistration.setNamespaceInfo(nsInfo); + bpRegistration = newBpRegistration; break; } catch(EOFException e) { // namenode might have just restarted LOG.info("Problem connecting to server: " + nnAddr + " :" diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 5bc50b0493..42cbd96f93 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -1261,10 +1261,7 @@ DatanodeRegistration createBPRegistration(NamespaceInfo nsInfo) { */ synchronized void bpRegistrationSucceeded(DatanodeRegistration bpRegistration, String blockPoolId) throws IOException { - // Set the ID if we haven't already - if (null == id) { - id = bpRegistration; - } + id = bpRegistration; if(!storage.getDatanodeUuid().equals(bpRegistration.getDatanodeUuid())) { throw new IOException("Inconsistent Datanode IDs. Name-node returned "