HDFS-14329. RBF: Add maintenance nodes to federation metrics. Contributed by Ayush Saxena.

This commit is contained in:
Inigo Goiri 2019-03-03 10:51:58 -08:00 committed by Brahma Reddy Battula
parent 7bbe01a196
commit 1ce25e702b
11 changed files with 188 additions and 2 deletions

View File

@ -130,6 +130,24 @@ public interface FederationMBean {
*/ */
int getNumDecomDeadNodes(); int getNumDecomDeadNodes();
/**
* Get the number of live datanodes which are under maintenance.
* @return Number of live datanodes which are under maintenance.
*/
int getNumInMaintenanceLiveDataNodes();
/**
* Get the number of dead datanodes which are under maintenance.
* @return Number of dead datanodes which are under maintenance.
*/
int getNumInMaintenanceDeadDataNodes();
/**
* Get the number of datanodes which are entering maintenance.
* @return Number of datanodes which are entering maintenance.
*/
int getNumEnteringMaintenanceDataNodes();
/** /**
* Get Max, Median, Min and Standard Deviation of DataNodes usage. * Get Max, Median, Min and Standard Deviation of DataNodes usage.
* @return the DataNode usage information, as a JSON string. * @return the DataNode usage information, as a JSON string.

View File

@ -437,6 +437,24 @@ public int getNumDecomDeadNodes() {
MembershipStats::getNumOfDecomDeadDatanodes); MembershipStats::getNumOfDecomDeadDatanodes);
} }
@Override
public int getNumInMaintenanceLiveDataNodes() {
return getNameserviceAggregatedInt(
MembershipStats::getNumOfInMaintenanceLiveDataNodes);
}
@Override
public int getNumInMaintenanceDeadDataNodes() {
return getNameserviceAggregatedInt(
MembershipStats::getNumOfInMaintenanceDeadDataNodes);
}
@Override
public int getNumEnteringMaintenanceDataNodes() {
return getNameserviceAggregatedInt(
MembershipStats::getNumOfEnteringMaintenanceDataNodes);
}
@Override // NameNodeMXBean @Override // NameNodeMXBean
public String getNodeUsage() { public String getNodeUsage() {
float median = 0; float median = 0;

View File

@ -697,16 +697,34 @@ public int getNumDecommissioningDataNodes() {
@Override @Override
public int getNumInMaintenanceLiveDataNodes() { public int getNumInMaintenanceLiveDataNodes() {
try {
return getFederationMetrics().getNumInMaintenanceLiveDataNodes();
} catch (IOException e) {
LOG.debug("Failed to get number of live in maintenance nodes",
e.getMessage());
}
return 0; return 0;
} }
@Override @Override
public int getNumInMaintenanceDeadDataNodes() { public int getNumInMaintenanceDeadDataNodes() {
try {
return getFederationMetrics().getNumInMaintenanceDeadDataNodes();
} catch (IOException e) {
LOG.debug("Failed to get number of dead in maintenance nodes",
e.getMessage());
}
return 0; return 0;
} }
@Override @Override
public int getNumEnteringMaintenanceDataNodes() { public int getNumEnteringMaintenanceDataNodes() {
try {
return getFederationMetrics().getNumEnteringMaintenanceDataNodes();
} catch (IOException e) {
LOG.debug("Failed to get number of entering maintenance nodes",
e.getMessage());
}
return 0; return 0;
} }

View File

@ -283,6 +283,12 @@ public boolean registerNamenode(NamenodeStatusReport report)
stats.setNumOfStaleDatanodes(report.getNumStaleDatanodes()); stats.setNumOfStaleDatanodes(report.getNumStaleDatanodes());
stats.setNumOfDecomActiveDatanodes(report.getNumDecomLiveDatanodes()); stats.setNumOfDecomActiveDatanodes(report.getNumDecomLiveDatanodes());
stats.setNumOfDecomDeadDatanodes(report.getNumDecomDeadDatanodes()); stats.setNumOfDecomDeadDatanodes(report.getNumDecomDeadDatanodes());
stats.setNumOfInMaintenanceLiveDataNodes(
report.getNumInMaintenanceLiveDataNodes());
stats.setNumOfInMaintenanceDeadDataNodes(
report.getNumInMaintenanceDeadDataNodes());
stats.setNumOfEnteringMaintenanceDataNodes(
report.getNumEnteringMaintenanceDataNodes());
record.setStats(stats); record.setStats(stats);
} }

View File

@ -50,6 +50,15 @@ public class NamenodeStatusReport {
/** Dead decommissioned datanodes. */ /** Dead decommissioned datanodes. */
private int deadDecomDatanodes = -1; private int deadDecomDatanodes = -1;
/** Live in maintenance datanodes. */
private int inMaintenanceLiveDataNodes = -1;
/** Dead in maintenance datanodes. */
private int inMaintenanceDeadDataNodes = -1;
/** Entering maintenance datanodes. */
private int enteringMaintenanceDataNodes = -1;
/** Space stats. */ /** Space stats. */
private long availableSpace = -1; private long availableSpace = -1;
private long numOfFiles = -1; private long numOfFiles = -1;
@ -228,15 +237,23 @@ public boolean getSafemode() {
* @param numDecom Number of decommissioning nodes. * @param numDecom Number of decommissioning nodes.
* @param numLiveDecom Number of decommissioned live nodes. * @param numLiveDecom Number of decommissioned live nodes.
* @param numDeadDecom Number of decommissioned dead nodes. * @param numDeadDecom Number of decommissioned dead nodes.
* @param numInMaintenanceLive Number of in maintenance live nodes.
* @param numInMaintenanceDead Number of in maintenance dead nodes.
* @param numEnteringMaintenance Number of entering maintenance nodes.
*/ */
public void setDatanodeInfo(int numLive, int numDead, int numStale, public void setDatanodeInfo(int numLive, int numDead, int numStale,
int numDecom, int numLiveDecom, int numDeadDecom) { int numDecom, int numLiveDecom, int numDeadDecom,
int numInMaintenanceLive, int numInMaintenanceDead,
int numEnteringMaintenance) {
this.liveDatanodes = numLive; this.liveDatanodes = numLive;
this.deadDatanodes = numDead; this.deadDatanodes = numDead;
this.staleDatanodes = numStale; this.staleDatanodes = numStale;
this.decomDatanodes = numDecom; this.decomDatanodes = numDecom;
this.liveDecomDatanodes = numLiveDecom; this.liveDecomDatanodes = numLiveDecom;
this.deadDecomDatanodes = numDeadDecom; this.deadDecomDatanodes = numDeadDecom;
this.inMaintenanceLiveDataNodes = numInMaintenanceLive;
this.inMaintenanceDeadDataNodes = numInMaintenanceDead;
this.enteringMaintenanceDataNodes = numEnteringMaintenance;
this.statsValid = true; this.statsValid = true;
} }
@ -294,6 +311,33 @@ public int getNumDecomDeadDatanodes() {
return this.deadDecomDatanodes; return this.deadDecomDatanodes;
} }
/**
* Get the number of live in maintenance nodes.
*
* @return The number of live in maintenance nodes.
*/
public int getNumInMaintenanceLiveDataNodes() {
return this.inMaintenanceLiveDataNodes;
}
/**
* Get the number of dead in maintenance nodes.
*
* @return The number of dead in maintenance nodes.
*/
public int getNumInMaintenanceDeadDataNodes() {
return this.inMaintenanceDeadDataNodes;
}
/**
* Get the number of entering maintenance nodes.
*
* @return The number of entering maintenance nodes.
*/
public int getNumEnteringMaintenanceDataNodes() {
return this.enteringMaintenanceDataNodes;
}
/** /**
* Set the filesystem information. * Set the filesystem information.
* *

View File

@ -341,7 +341,10 @@ private void updateJMXParameters(
jsonObject.getInt("NumStaleDataNodes"), jsonObject.getInt("NumStaleDataNodes"),
jsonObject.getInt("NumDecommissioningDataNodes"), jsonObject.getInt("NumDecommissioningDataNodes"),
jsonObject.getInt("NumDecomLiveDataNodes"), jsonObject.getInt("NumDecomLiveDataNodes"),
jsonObject.getInt("NumDecomDeadDataNodes")); jsonObject.getInt("NumDecomDeadDataNodes"),
jsonObject.getInt("NumInMaintenanceLiveDataNodes"),
jsonObject.getInt("NumInMaintenanceDeadDataNodes"),
jsonObject.getInt("NumEnteringMaintenanceDataNodes"));
} else if (name.equals( } else if (name.equals(
"Hadoop:service=NameNode,name=FSNamesystem")) { "Hadoop:service=NameNode,name=FSNamesystem")) {
report.setNamesystemInfo( report.setNamesystemInfo(

View File

@ -97,6 +97,18 @@ public static MembershipStats newInstance() throws IOException {
public abstract int getNumOfDecomDeadDatanodes(); public abstract int getNumOfDecomDeadDatanodes();
public abstract void setNumOfInMaintenanceLiveDataNodes(int nodes);
public abstract int getNumOfInMaintenanceLiveDataNodes();
public abstract void setNumOfInMaintenanceDeadDataNodes(int nodes);
public abstract int getNumOfInMaintenanceDeadDataNodes();
public abstract void setNumOfEnteringMaintenanceDataNodes(int nodes);
public abstract int getNumOfEnteringMaintenanceDataNodes();
@Override @Override
public SortedMap<String, String> getPrimaryKeys() { public SortedMap<String, String> getPrimaryKeys() {
// This record is not stored directly, no key needed // This record is not stored directly, no key needed

View File

@ -208,4 +208,37 @@ public void setNumOfDecomDeadDatanodes(int nodes) {
public int getNumOfDecomDeadDatanodes() { public int getNumOfDecomDeadDatanodes() {
return this.translator.getProtoOrBuilder().getNumOfDecomDeadDatanodes(); return this.translator.getProtoOrBuilder().getNumOfDecomDeadDatanodes();
} }
@Override
public void setNumOfInMaintenanceLiveDataNodes(int nodes) {
this.translator.getBuilder().setNumOfInMaintenanceLiveDataNodes(nodes);
}
@Override
public int getNumOfInMaintenanceLiveDataNodes() {
return this.translator.getProtoOrBuilder()
.getNumOfInMaintenanceLiveDataNodes();
}
@Override
public void setNumOfInMaintenanceDeadDataNodes(int nodes) {
this.translator.getBuilder().setNumOfInMaintenanceDeadDataNodes(nodes);
}
@Override
public int getNumOfInMaintenanceDeadDataNodes() {
return this.translator.getProtoOrBuilder()
.getNumOfInMaintenanceDeadDataNodes();
}
@Override
public void setNumOfEnteringMaintenanceDataNodes(int nodes) {
this.translator.getBuilder().setNumOfEnteringMaintenanceDataNodes(nodes);
}
@Override
public int getNumOfEnteringMaintenanceDataNodes() {
return this.translator.getProtoOrBuilder()
.getNumOfEnteringMaintenanceDataNodes();
}
} }

View File

@ -46,6 +46,9 @@ message NamenodeMembershipStatsRecordProto {
optional uint32 numOfDecomActiveDatanodes = 23; optional uint32 numOfDecomActiveDatanodes = 23;
optional uint32 numOfDecomDeadDatanodes = 24; optional uint32 numOfDecomDeadDatanodes = 24;
optional uint32 numOfStaleDatanodes = 25; optional uint32 numOfStaleDatanodes = 25;
optional uint32 numOfInMaintenanceLiveDataNodes = 26;
optional uint32 numOfInMaintenanceDeadDataNodes = 27;
optional uint32 numOfEnteringMaintenanceDataNodes = 28;
} }
message NamenodeMembershipRecordProto { message NamenodeMembershipRecordProto {

View File

@ -145,6 +145,12 @@ public void testNamenodeStatsDataSource() throws IOException, JSONException {
stats.getNumOfDecomActiveDatanodes()); stats.getNumOfDecomActiveDatanodes());
assertEquals(json.getLong("numOfDecomDeadDatanodes"), assertEquals(json.getLong("numOfDecomDeadDatanodes"),
stats.getNumOfDecomDeadDatanodes()); stats.getNumOfDecomDeadDatanodes());
assertEquals(json.getLong("numOfInMaintenanceLiveDataNodes"),
stats.getNumOfInMaintenanceLiveDataNodes());
assertEquals(json.getLong("numOfInMaintenanceDeadDataNodes"),
stats.getNumOfInMaintenanceDeadDataNodes());
assertEquals(json.getLong("numOfEnteringMaintenanceDataNodes"),
stats.getNumOfEnteringMaintenanceDataNodes());
assertEquals(json.getLong("numOfBlocks"), stats.getNumOfBlocks()); assertEquals(json.getLong("numOfBlocks"), stats.getNumOfBlocks());
assertEquals(json.getString("rpcAddress"), mockEntry.getRpcAddress()); assertEquals(json.getString("rpcAddress"), mockEntry.getRpcAddress());
assertEquals(json.getString("webAddress"), mockEntry.getWebAddress()); assertEquals(json.getString("webAddress"), mockEntry.getWebAddress());
@ -197,6 +203,12 @@ public void testNameserviceStatsDataSource()
json.getLong("numOfDecomActiveDatanodes")); json.getLong("numOfDecomActiveDatanodes"));
assertEquals(stats.getNumOfDecomDeadDatanodes(), assertEquals(stats.getNumOfDecomDeadDatanodes(),
json.getLong("numOfDecomDeadDatanodes")); json.getLong("numOfDecomDeadDatanodes"));
assertEquals(stats.getNumOfInMaintenanceLiveDataNodes(),
json.getLong("numOfInMaintenanceLiveDataNodes"));
assertEquals(stats.getNumOfInMaintenanceDeadDataNodes(),
json.getLong("numOfInMaintenanceDeadDataNodes"));
assertEquals(stats.getNumOfStaleDatanodes(),
json.getLong("numOfEnteringMaintenanceDataNodes"));
assertEquals(stats.getProvidedSpace(), assertEquals(stats.getProvidedSpace(),
json.getLong("providedSpace")); json.getLong("providedSpace"));
nameservicesFound++; nameservicesFound++;
@ -268,6 +280,9 @@ private void validateClusterStatsBean(FederationMBean bean)
long numDecom = 0; long numDecom = 0;
long numDecomLive = 0; long numDecomLive = 0;
long numDecomDead = 0; long numDecomDead = 0;
long numInMaintenanceLive = 0;
long numInMaintenanceDead = 0;
long numEnteringMaintenance = 0;
long numFiles = 0; long numFiles = 0;
for (MembershipState mock : getActiveMemberships()) { for (MembershipState mock : getActiveMemberships()) {
MembershipStats stats = mock.getStats(); MembershipStats stats = mock.getStats();
@ -278,6 +293,9 @@ private void validateClusterStatsBean(FederationMBean bean)
numDecom += stats.getNumOfDecommissioningDatanodes(); numDecom += stats.getNumOfDecommissioningDatanodes();
numDecomLive += stats.getNumOfDecomActiveDatanodes(); numDecomLive += stats.getNumOfDecomActiveDatanodes();
numDecomDead += stats.getNumOfDecomDeadDatanodes(); numDecomDead += stats.getNumOfDecomDeadDatanodes();
numInMaintenanceLive += stats.getNumOfInMaintenanceLiveDataNodes();
numInMaintenanceDead += stats.getNumOfInMaintenanceLiveDataNodes();
numEnteringMaintenance += stats.getNumOfEnteringMaintenanceDataNodes();
} }
assertEquals(numBlocks, bean.getNumBlocks()); assertEquals(numBlocks, bean.getNumBlocks());
@ -287,6 +305,10 @@ private void validateClusterStatsBean(FederationMBean bean)
assertEquals(numDecom, bean.getNumDecommissioningNodes()); assertEquals(numDecom, bean.getNumDecommissioningNodes());
assertEquals(numDecomLive, bean.getNumDecomLiveNodes()); assertEquals(numDecomLive, bean.getNumDecomLiveNodes());
assertEquals(numDecomDead, bean.getNumDecomDeadNodes()); assertEquals(numDecomDead, bean.getNumDecomDeadNodes());
assertEquals(numInMaintenanceLive, bean.getNumInMaintenanceLiveDataNodes());
assertEquals(numInMaintenanceDead, bean.getNumInMaintenanceDeadDataNodes());
assertEquals(numEnteringMaintenance,
bean.getNumEnteringMaintenanceDataNodes());
assertEquals(numFiles, bean.getNumFiles()); assertEquals(numFiles, bean.getNumFiles());
assertEquals(getActiveMemberships().size() + getStandbyMemberships().size(), assertEquals(getActiveMemberships().size() + getStandbyMemberships().size(),
bean.getNumNamenodes()); bean.getNumNamenodes());

View File

@ -52,6 +52,9 @@ public class TestMembershipState {
private static final int NUM_DECOM = 700; private static final int NUM_DECOM = 700;
private static final int NUM_DECOM_ACTIVE = 800; private static final int NUM_DECOM_ACTIVE = 800;
private static final int NUM_DECOM_DEAD = 900; private static final int NUM_DECOM_DEAD = 900;
private static final int NUM_MAIN_LIVE = 151;
private static final int NUM_MAIN_DEAD = 303;
private static final int NUM_ENTER_MAIN = 144;
private static final long NUM_BLOCK_MISSING = 1000; private static final long NUM_BLOCK_MISSING = 1000;
private static final long TOTAL_SPACE = 1100; private static final long TOTAL_SPACE = 1100;
@ -78,6 +81,9 @@ private MembershipState createRecord() throws IOException {
stats.setNumOfDecommissioningDatanodes(NUM_DECOM); stats.setNumOfDecommissioningDatanodes(NUM_DECOM);
stats.setNumOfDecomActiveDatanodes(NUM_DECOM_ACTIVE); stats.setNumOfDecomActiveDatanodes(NUM_DECOM_ACTIVE);
stats.setNumOfDecomDeadDatanodes(NUM_DECOM_DEAD); stats.setNumOfDecomDeadDatanodes(NUM_DECOM_DEAD);
stats.setNumOfInMaintenanceLiveDataNodes(NUM_MAIN_LIVE);
stats.setNumOfInMaintenanceDeadDataNodes(NUM_MAIN_DEAD);
stats.setNumOfEnteringMaintenanceDataNodes(NUM_ENTER_MAIN);
stats.setNumOfBlocksMissing(NUM_BLOCK_MISSING); stats.setNumOfBlocksMissing(NUM_BLOCK_MISSING);
stats.setTotalSpace(TOTAL_SPACE); stats.setTotalSpace(TOTAL_SPACE);
stats.setAvailableSpace(AVAILABLE_SPACE); stats.setAvailableSpace(AVAILABLE_SPACE);
@ -107,6 +113,9 @@ private void validateRecord(MembershipState record) throws IOException {
assertEquals(NUM_DECOM, stats.getNumOfDecommissioningDatanodes()); assertEquals(NUM_DECOM, stats.getNumOfDecommissioningDatanodes());
assertEquals(NUM_DECOM_ACTIVE, stats.getNumOfDecomActiveDatanodes()); assertEquals(NUM_DECOM_ACTIVE, stats.getNumOfDecomActiveDatanodes());
assertEquals(NUM_DECOM_DEAD, stats.getNumOfDecomDeadDatanodes()); assertEquals(NUM_DECOM_DEAD, stats.getNumOfDecomDeadDatanodes());
assertEquals(NUM_MAIN_LIVE, stats.getNumOfInMaintenanceLiveDataNodes());
assertEquals(NUM_MAIN_DEAD, stats.getNumOfInMaintenanceDeadDataNodes());
assertEquals(NUM_ENTER_MAIN, stats.getNumOfEnteringMaintenanceDataNodes());
assertEquals(TOTAL_SPACE, stats.getTotalSpace()); assertEquals(TOTAL_SPACE, stats.getTotalSpace());
assertEquals(AVAILABLE_SPACE, stats.getAvailableSpace()); assertEquals(AVAILABLE_SPACE, stats.getAvailableSpace());
} }