From 10b42e9306368ab7506200615e7ea4fd5278f8cb Mon Sep 17 00:00:00 2001 From: Tao Li Date: Fri, 3 Feb 2023 10:02:45 +0800 Subject: [PATCH] HDFS-16902. Add Namenode status to BPServiceActor metrics and improve logging in offerservice (#5334) Reviewed-by: Mingliang Liu Reviewed-by: Shilun Fan Signed-off-by: Tao Li --- .../hdfs/server/datanode/BPServiceActor.java | 15 ++++++++++++++- .../hdfs/server/datanode/ErrorReportAction.java | 9 +++++++++ .../server/datanode/ReportBadBlockAction.java | 10 ++++++++++ .../src/main/webapps/datanode/datanode.html | 2 ++ 4 files changed, 35 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java index 06c2c681c3..362edd1cac 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java @@ -200,6 +200,7 @@ private String getNameNodeAddress() { Map getActorInfoMap() { final Map info = new HashMap(); info.put("NamenodeAddress", getNameNodeAddress()); + info.put("NamenodeHaState", state != null ? state.toString() : "Unknown"); info.put("BlockPoolID", bpos.getBlockPoolId()); info.put("ActorState", getRunningState()); info.put("LastHeartbeat", @@ -685,6 +686,8 @@ private void offerService() throws Exception { // Every so often, send heartbeat or block-report // final boolean sendHeartbeat = scheduler.isHeartbeatDue(startTime); + LOG.debug("BP offer service run start time: {}, sendHeartbeat: {}", startTime, + sendHeartbeat); HeartbeatResponse resp = null; if (sendHeartbeat) { // @@ -697,6 +700,8 @@ private void offerService() throws Exception { boolean requestBlockReportLease = (fullBlockReportLeaseId == 0) && scheduler.isBlockReportDue(startTime); if (!dn.areHeartbeatsDisabledForTests()) { + LOG.debug("Before sending heartbeat to namenode {}, the state of the namenode known" + + " to datanode so far is {}", this.getNameNodeAddress(), state); resp = sendHeartBeat(requestBlockReportLease); assert resp != null; if (resp.getFullBlockReportLeaseId() != 0) { @@ -721,7 +726,12 @@ private void offerService() throws Exception { // that we should actually process. bpos.updateActorStatesFromHeartbeat( this, resp.getNameNodeHaState()); - state = resp.getNameNodeHaState().getState(); + HAServiceState stateFromResp = resp.getNameNodeHaState().getState(); + if (state != stateFromResp) { + LOG.info("After receiving heartbeat response, updating state of namenode {} to {}", + this.getNameNodeAddress(), stateFromResp); + } + state = stateFromResp; if (state == HAServiceState.ACTIVE) { handleRollingUpgradeStatus(resp); @@ -781,6 +791,7 @@ private void sleepAfterException() { long sleepTime = Math.min(1000, dnConf.heartBeatInterval); Thread.sleep(sleepTime); } catch (InterruptedException ie) { + LOG.info("BPServiceActor {} is interrupted", this); Thread.currentThread().interrupt(); } } @@ -983,6 +994,8 @@ private void processQueueMessages() { while (!duplicateQueue.isEmpty()) { BPServiceActorAction actionItem = duplicateQueue.remove(); try { + LOG.debug("BPServiceActor ( {} ) processing queued messages. Action item: {}", this, + actionItem); actionItem.reportTo(bpNamenode, bpRegistration); } catch (BPServiceActorActionException baae) { LOG.warn(baae.getMessage() + nnAddr , baae); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java index 26498d4a0f..38581bee22 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ErrorReportAction.java @@ -20,6 +20,7 @@ import java.io.IOException; +import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.ipc.RemoteException; @@ -84,4 +85,12 @@ public boolean equals(Object obj) { } return true; } + + @Override + public String toString() { + return new ToStringBuilder(this) + .append("errorCode", errorCode) + .append("errorMessage", errorMessage) + .toString(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java index 2946358a5c..d525d9f782 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReportBadBlockAction.java @@ -20,6 +20,7 @@ import java.io.IOException; +import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DatanodeInfo.DatanodeInfoBuilder; @@ -111,4 +112,13 @@ public boolean equals(Object obj) { } return true; } + + @Override + public String toString() { + return new ToStringBuilder(this) + .append("block", block) + .append("storageUuid", storageUuid) + .append("storageType", storageType) + .toString(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/datanode.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/datanode.html index caab81ef68..b491d5a04e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/datanode.html +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/datanode/datanode.html @@ -81,6 +81,7 @@ Namenode Address + Namenode HA State Block Pool ID Actor State Last Heartbeat @@ -91,6 +92,7 @@ {#dn.BPServiceActorInfo} {NamenodeAddress} + {NamenodeHaState} {BlockPoolID} {ActorState} {LastHeartbeat}s