HDFS-16902 Add Namenode status to BPServiceActor metrics and improve logging in offerservice (#5334)

Reviewed-by: Mingliang Liu <liuml07@apache.org>
Reviewed-by: Shilun Fan <slfan1989@apache.org>
Signed-off-by: Tao Li <tomscut@apache.org>
This commit is contained in:
Viraj Jasani 2023-02-02 17:11:02 -08:00 committed by GitHub
parent 22f6d55b71
commit bce388fd3f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 35 additions and 1 deletions

View File

@ -202,6 +202,7 @@ private String getNameNodeAddress() {
Map<String, String> getActorInfoMap() { Map<String, String> getActorInfoMap() {
final Map<String, String> info = new HashMap<String, String>(); final Map<String, String> info = new HashMap<String, String>();
info.put("NamenodeAddress", getNameNodeAddress()); info.put("NamenodeAddress", getNameNodeAddress());
info.put("NamenodeHaState", state != null ? state.toString() : "Unknown");
info.put("BlockPoolID", bpos.getBlockPoolId()); info.put("BlockPoolID", bpos.getBlockPoolId());
info.put("ActorState", getRunningState()); info.put("ActorState", getRunningState());
info.put("LastHeartbeat", info.put("LastHeartbeat",
@ -697,6 +698,8 @@ private void offerService() throws Exception {
// Every so often, send heartbeat or block-report // Every so often, send heartbeat or block-report
// //
final boolean sendHeartbeat = scheduler.isHeartbeatDue(startTime); final boolean sendHeartbeat = scheduler.isHeartbeatDue(startTime);
LOG.debug("BP offer service run start time: {}, sendHeartbeat: {}", startTime,
sendHeartbeat);
HeartbeatResponse resp = null; HeartbeatResponse resp = null;
if (sendHeartbeat) { if (sendHeartbeat) {
// //
@ -709,6 +712,8 @@ private void offerService() throws Exception {
boolean requestBlockReportLease = (fullBlockReportLeaseId == 0) && boolean requestBlockReportLease = (fullBlockReportLeaseId == 0) &&
scheduler.isBlockReportDue(startTime); scheduler.isBlockReportDue(startTime);
if (!dn.areHeartbeatsDisabledForTests()) { if (!dn.areHeartbeatsDisabledForTests()) {
LOG.debug("Before sending heartbeat to namenode {}, the state of the namenode known"
+ " to datanode so far is {}", this.getNameNodeAddress(), state);
resp = sendHeartBeat(requestBlockReportLease); resp = sendHeartBeat(requestBlockReportLease);
assert resp != null; assert resp != null;
if (resp.getFullBlockReportLeaseId() != 0) { if (resp.getFullBlockReportLeaseId() != 0) {
@ -733,7 +738,12 @@ private void offerService() throws Exception {
// that we should actually process. // that we should actually process.
bpos.updateActorStatesFromHeartbeat( bpos.updateActorStatesFromHeartbeat(
this, resp.getNameNodeHaState()); this, resp.getNameNodeHaState());
state = resp.getNameNodeHaState().getState(); HAServiceState stateFromResp = resp.getNameNodeHaState().getState();
if (state != stateFromResp) {
LOG.info("After receiving heartbeat response, updating state of namenode {} to {}",
this.getNameNodeAddress(), stateFromResp);
}
state = stateFromResp;
if (state == HAServiceState.ACTIVE) { if (state == HAServiceState.ACTIVE) {
handleRollingUpgradeStatus(resp); handleRollingUpgradeStatus(resp);
@ -794,6 +804,7 @@ private void sleepAfterException() {
long sleepTime = Math.min(1000, dnConf.heartBeatInterval); long sleepTime = Math.min(1000, dnConf.heartBeatInterval);
Thread.sleep(sleepTime); Thread.sleep(sleepTime);
} catch (InterruptedException ie) { } catch (InterruptedException ie) {
LOG.info("BPServiceActor {} is interrupted", this);
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
} }
} }
@ -995,6 +1006,8 @@ private void processQueueMessages() {
while (!duplicateQueue.isEmpty()) { while (!duplicateQueue.isEmpty()) {
BPServiceActorAction actionItem = duplicateQueue.remove(); BPServiceActorAction actionItem = duplicateQueue.remove();
try { try {
LOG.debug("BPServiceActor ( {} ) processing queued messages. Action item: {}", this,
actionItem);
actionItem.reportTo(bpNamenode, bpRegistration); actionItem.reportTo(bpNamenode, bpRegistration);
} catch (BPServiceActorActionException baae) { } catch (BPServiceActorActionException baae) {
LOG.warn(baae.getMessage() + nnAddr , baae); LOG.warn(baae.getMessage() + nnAddr , baae);

View File

@ -20,6 +20,7 @@
import java.io.IOException; import java.io.IOException;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.ipc.RemoteException;
@ -84,4 +85,12 @@ public boolean equals(Object obj) {
} }
return true; return true;
} }
@Override
public String toString() {
return new ToStringBuilder(this)
.append("errorCode", errorCode)
.append("errorMessage", errorMessage)
.toString();
}
} }

View File

@ -20,6 +20,7 @@
import java.io.IOException; import java.io.IOException;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.fs.StorageType;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo.DatanodeInfoBuilder; import org.apache.hadoop.hdfs.protocol.DatanodeInfo.DatanodeInfoBuilder;
@ -111,4 +112,13 @@ public boolean equals(Object obj) {
} }
return true; return true;
} }
@Override
public String toString() {
return new ToStringBuilder(this)
.append("block", block)
.append("storageUuid", storageUuid)
.append("storageType", storageType)
.toString();
}
} }

View File

@ -81,6 +81,7 @@
<thead> <thead>
<tr> <tr>
<th>Namenode Address</th> <th>Namenode Address</th>
<th>Namenode HA State</th>
<th>Block Pool ID</th> <th>Block Pool ID</th>
<th>Actor State</th> <th>Actor State</th>
<th>Last Heartbeat</th> <th>Last Heartbeat</th>
@ -91,6 +92,7 @@
{#dn.BPServiceActorInfo} {#dn.BPServiceActorInfo}
<tr> <tr>
<td>{NamenodeAddress}</td> <td>{NamenodeAddress}</td>
<td>{NamenodeHaState}</td>
<td>{BlockPoolID}</td> <td>{BlockPoolID}</td>
<td>{ActorState}</td> <td>{ActorState}</td>
<td>{LastHeartbeat}s</td> <td>{LastHeartbeat}s</td>