HDFS-14329. RBF: Add maintenance nodes to federation metrics. Contributed by Ayush Saxena.

This commit is contained in:
Inigo Goiri 2019-03-03 10:51:58 -08:00 committed by Brahma Reddy Battula
parent 7bbe01a196
commit 1ce25e702b
11 changed files with 188 additions and 2 deletions

View File

@ -130,6 +130,24 @@ public interface FederationMBean {
*/
int getNumDecomDeadNodes();
/**
* Get the number of live datanodes which are under maintenance.
* @return Number of live datanodes which are under maintenance.
*/
int getNumInMaintenanceLiveDataNodes();
/**
* Get the number of dead datanodes which are under maintenance.
* @return Number of dead datanodes which are under maintenance.
*/
int getNumInMaintenanceDeadDataNodes();
/**
* Get the number of datanodes which are entering maintenance.
* @return Number of datanodes which are entering maintenance.
*/
int getNumEnteringMaintenanceDataNodes();
/**
* Get Max, Median, Min and Standard Deviation of DataNodes usage.
* @return the DataNode usage information, as a JSON string.

View File

@ -437,6 +437,24 @@ public int getNumDecomDeadNodes() {
MembershipStats::getNumOfDecomDeadDatanodes);
}
@Override
public int getNumInMaintenanceLiveDataNodes() {
return getNameserviceAggregatedInt(
MembershipStats::getNumOfInMaintenanceLiveDataNodes);
}
@Override
public int getNumInMaintenanceDeadDataNodes() {
return getNameserviceAggregatedInt(
MembershipStats::getNumOfInMaintenanceDeadDataNodes);
}
@Override
public int getNumEnteringMaintenanceDataNodes() {
return getNameserviceAggregatedInt(
MembershipStats::getNumOfEnteringMaintenanceDataNodes);
}
@Override // NameNodeMXBean
public String getNodeUsage() {
float median = 0;

View File

@ -697,16 +697,34 @@ public int getNumDecommissioningDataNodes() {
@Override
public int getNumInMaintenanceLiveDataNodes() {
try {
return getFederationMetrics().getNumInMaintenanceLiveDataNodes();
} catch (IOException e) {
LOG.debug("Failed to get number of live in maintenance nodes",
e.getMessage());
}
return 0;
}
@Override
public int getNumInMaintenanceDeadDataNodes() {
try {
return getFederationMetrics().getNumInMaintenanceDeadDataNodes();
} catch (IOException e) {
LOG.debug("Failed to get number of dead in maintenance nodes",
e.getMessage());
}
return 0;
}
@Override
public int getNumEnteringMaintenanceDataNodes() {
try {
return getFederationMetrics().getNumEnteringMaintenanceDataNodes();
} catch (IOException e) {
LOG.debug("Failed to get number of entering maintenance nodes",
e.getMessage());
}
return 0;
}

View File

@ -283,6 +283,12 @@ public boolean registerNamenode(NamenodeStatusReport report)
stats.setNumOfStaleDatanodes(report.getNumStaleDatanodes());
stats.setNumOfDecomActiveDatanodes(report.getNumDecomLiveDatanodes());
stats.setNumOfDecomDeadDatanodes(report.getNumDecomDeadDatanodes());
stats.setNumOfInMaintenanceLiveDataNodes(
report.getNumInMaintenanceLiveDataNodes());
stats.setNumOfInMaintenanceDeadDataNodes(
report.getNumInMaintenanceDeadDataNodes());
stats.setNumOfEnteringMaintenanceDataNodes(
report.getNumEnteringMaintenanceDataNodes());
record.setStats(stats);
}

View File

@ -50,6 +50,15 @@ public class NamenodeStatusReport {
/** Dead decommissioned datanodes. */
private int deadDecomDatanodes = -1;
/** Live in maintenance datanodes. */
private int inMaintenanceLiveDataNodes = -1;
/** Dead in maintenance datanodes. */
private int inMaintenanceDeadDataNodes = -1;
/** Entering maintenance datanodes. */
private int enteringMaintenanceDataNodes = -1;
/** Space stats. */
private long availableSpace = -1;
private long numOfFiles = -1;
@ -228,15 +237,23 @@ public boolean getSafemode() {
* @param numDecom Number of decommissioning nodes.
* @param numLiveDecom Number of decommissioned live nodes.
* @param numDeadDecom Number of decommissioned dead nodes.
* @param numInMaintenanceLive Number of in maintenance live nodes.
* @param numInMaintenanceDead Number of in maintenance dead nodes.
* @param numEnteringMaintenance Number of entering maintenance nodes.
*/
public void setDatanodeInfo(int numLive, int numDead, int numStale,
int numDecom, int numLiveDecom, int numDeadDecom) {
int numDecom, int numLiveDecom, int numDeadDecom,
int numInMaintenanceLive, int numInMaintenanceDead,
int numEnteringMaintenance) {
this.liveDatanodes = numLive;
this.deadDatanodes = numDead;
this.staleDatanodes = numStale;
this.decomDatanodes = numDecom;
this.liveDecomDatanodes = numLiveDecom;
this.deadDecomDatanodes = numDeadDecom;
this.inMaintenanceLiveDataNodes = numInMaintenanceLive;
this.inMaintenanceDeadDataNodes = numInMaintenanceDead;
this.enteringMaintenanceDataNodes = numEnteringMaintenance;
this.statsValid = true;
}
@ -294,6 +311,33 @@ public int getNumDecomDeadDatanodes() {
return this.deadDecomDatanodes;
}
/**
* Get the number of live in maintenance nodes.
*
* @return The number of live in maintenance nodes.
*/
public int getNumInMaintenanceLiveDataNodes() {
return this.inMaintenanceLiveDataNodes;
}
/**
* Get the number of dead in maintenance nodes.
*
* @return The number of dead in maintenance nodes.
*/
public int getNumInMaintenanceDeadDataNodes() {
return this.inMaintenanceDeadDataNodes;
}
/**
* Get the number of entering maintenance nodes.
*
* @return The number of entering maintenance nodes.
*/
public int getNumEnteringMaintenanceDataNodes() {
return this.enteringMaintenanceDataNodes;
}
/**
* Set the filesystem information.
*

View File

@ -341,7 +341,10 @@ private void updateJMXParameters(
jsonObject.getInt("NumStaleDataNodes"),
jsonObject.getInt("NumDecommissioningDataNodes"),
jsonObject.getInt("NumDecomLiveDataNodes"),
jsonObject.getInt("NumDecomDeadDataNodes"));
jsonObject.getInt("NumDecomDeadDataNodes"),
jsonObject.getInt("NumInMaintenanceLiveDataNodes"),
jsonObject.getInt("NumInMaintenanceDeadDataNodes"),
jsonObject.getInt("NumEnteringMaintenanceDataNodes"));
} else if (name.equals(
"Hadoop:service=NameNode,name=FSNamesystem")) {
report.setNamesystemInfo(

View File

@ -97,6 +97,18 @@ public static MembershipStats newInstance() throws IOException {
public abstract int getNumOfDecomDeadDatanodes();
public abstract void setNumOfInMaintenanceLiveDataNodes(int nodes);
public abstract int getNumOfInMaintenanceLiveDataNodes();
public abstract void setNumOfInMaintenanceDeadDataNodes(int nodes);
public abstract int getNumOfInMaintenanceDeadDataNodes();
public abstract void setNumOfEnteringMaintenanceDataNodes(int nodes);
public abstract int getNumOfEnteringMaintenanceDataNodes();
@Override
public SortedMap<String, String> getPrimaryKeys() {
// This record is not stored directly, no key needed

View File

@ -208,4 +208,37 @@ public void setNumOfDecomDeadDatanodes(int nodes) {
public int getNumOfDecomDeadDatanodes() {
return this.translator.getProtoOrBuilder().getNumOfDecomDeadDatanodes();
}
@Override
public void setNumOfInMaintenanceLiveDataNodes(int nodes) {
this.translator.getBuilder().setNumOfInMaintenanceLiveDataNodes(nodes);
}
@Override
public int getNumOfInMaintenanceLiveDataNodes() {
return this.translator.getProtoOrBuilder()
.getNumOfInMaintenanceLiveDataNodes();
}
@Override
public void setNumOfInMaintenanceDeadDataNodes(int nodes) {
this.translator.getBuilder().setNumOfInMaintenanceDeadDataNodes(nodes);
}
@Override
public int getNumOfInMaintenanceDeadDataNodes() {
return this.translator.getProtoOrBuilder()
.getNumOfInMaintenanceDeadDataNodes();
}
@Override
public void setNumOfEnteringMaintenanceDataNodes(int nodes) {
this.translator.getBuilder().setNumOfEnteringMaintenanceDataNodes(nodes);
}
@Override
public int getNumOfEnteringMaintenanceDataNodes() {
return this.translator.getProtoOrBuilder()
.getNumOfEnteringMaintenanceDataNodes();
}
}

View File

@ -46,6 +46,9 @@ message NamenodeMembershipStatsRecordProto {
optional uint32 numOfDecomActiveDatanodes = 23;
optional uint32 numOfDecomDeadDatanodes = 24;
optional uint32 numOfStaleDatanodes = 25;
optional uint32 numOfInMaintenanceLiveDataNodes = 26;
optional uint32 numOfInMaintenanceDeadDataNodes = 27;
optional uint32 numOfEnteringMaintenanceDataNodes = 28;
}
message NamenodeMembershipRecordProto {

View File

@ -145,6 +145,12 @@ public void testNamenodeStatsDataSource() throws IOException, JSONException {
stats.getNumOfDecomActiveDatanodes());
assertEquals(json.getLong("numOfDecomDeadDatanodes"),
stats.getNumOfDecomDeadDatanodes());
assertEquals(json.getLong("numOfInMaintenanceLiveDataNodes"),
stats.getNumOfInMaintenanceLiveDataNodes());
assertEquals(json.getLong("numOfInMaintenanceDeadDataNodes"),
stats.getNumOfInMaintenanceDeadDataNodes());
assertEquals(json.getLong("numOfEnteringMaintenanceDataNodes"),
stats.getNumOfEnteringMaintenanceDataNodes());
assertEquals(json.getLong("numOfBlocks"), stats.getNumOfBlocks());
assertEquals(json.getString("rpcAddress"), mockEntry.getRpcAddress());
assertEquals(json.getString("webAddress"), mockEntry.getWebAddress());
@ -197,6 +203,12 @@ public void testNameserviceStatsDataSource()
json.getLong("numOfDecomActiveDatanodes"));
assertEquals(stats.getNumOfDecomDeadDatanodes(),
json.getLong("numOfDecomDeadDatanodes"));
assertEquals(stats.getNumOfInMaintenanceLiveDataNodes(),
json.getLong("numOfInMaintenanceLiveDataNodes"));
assertEquals(stats.getNumOfInMaintenanceDeadDataNodes(),
json.getLong("numOfInMaintenanceDeadDataNodes"));
assertEquals(stats.getNumOfStaleDatanodes(),
json.getLong("numOfEnteringMaintenanceDataNodes"));
assertEquals(stats.getProvidedSpace(),
json.getLong("providedSpace"));
nameservicesFound++;
@ -268,6 +280,9 @@ private void validateClusterStatsBean(FederationMBean bean)
long numDecom = 0;
long numDecomLive = 0;
long numDecomDead = 0;
long numInMaintenanceLive = 0;
long numInMaintenanceDead = 0;
long numEnteringMaintenance = 0;
long numFiles = 0;
for (MembershipState mock : getActiveMemberships()) {
MembershipStats stats = mock.getStats();
@ -278,6 +293,9 @@ private void validateClusterStatsBean(FederationMBean bean)
numDecom += stats.getNumOfDecommissioningDatanodes();
numDecomLive += stats.getNumOfDecomActiveDatanodes();
numDecomDead += stats.getNumOfDecomDeadDatanodes();
numInMaintenanceLive += stats.getNumOfInMaintenanceLiveDataNodes();
numInMaintenanceDead += stats.getNumOfInMaintenanceLiveDataNodes();
numEnteringMaintenance += stats.getNumOfEnteringMaintenanceDataNodes();
}
assertEquals(numBlocks, bean.getNumBlocks());
@ -287,6 +305,10 @@ private void validateClusterStatsBean(FederationMBean bean)
assertEquals(numDecom, bean.getNumDecommissioningNodes());
assertEquals(numDecomLive, bean.getNumDecomLiveNodes());
assertEquals(numDecomDead, bean.getNumDecomDeadNodes());
assertEquals(numInMaintenanceLive, bean.getNumInMaintenanceLiveDataNodes());
assertEquals(numInMaintenanceDead, bean.getNumInMaintenanceDeadDataNodes());
assertEquals(numEnteringMaintenance,
bean.getNumEnteringMaintenanceDataNodes());
assertEquals(numFiles, bean.getNumFiles());
assertEquals(getActiveMemberships().size() + getStandbyMemberships().size(),
bean.getNumNamenodes());

View File

@ -52,6 +52,9 @@ public class TestMembershipState {
private static final int NUM_DECOM = 700;
private static final int NUM_DECOM_ACTIVE = 800;
private static final int NUM_DECOM_DEAD = 900;
private static final int NUM_MAIN_LIVE = 151;
private static final int NUM_MAIN_DEAD = 303;
private static final int NUM_ENTER_MAIN = 144;
private static final long NUM_BLOCK_MISSING = 1000;
private static final long TOTAL_SPACE = 1100;
@ -78,6 +81,9 @@ private MembershipState createRecord() throws IOException {
stats.setNumOfDecommissioningDatanodes(NUM_DECOM);
stats.setNumOfDecomActiveDatanodes(NUM_DECOM_ACTIVE);
stats.setNumOfDecomDeadDatanodes(NUM_DECOM_DEAD);
stats.setNumOfInMaintenanceLiveDataNodes(NUM_MAIN_LIVE);
stats.setNumOfInMaintenanceDeadDataNodes(NUM_MAIN_DEAD);
stats.setNumOfEnteringMaintenanceDataNodes(NUM_ENTER_MAIN);
stats.setNumOfBlocksMissing(NUM_BLOCK_MISSING);
stats.setTotalSpace(TOTAL_SPACE);
stats.setAvailableSpace(AVAILABLE_SPACE);
@ -107,6 +113,9 @@ private void validateRecord(MembershipState record) throws IOException {
assertEquals(NUM_DECOM, stats.getNumOfDecommissioningDatanodes());
assertEquals(NUM_DECOM_ACTIVE, stats.getNumOfDecomActiveDatanodes());
assertEquals(NUM_DECOM_DEAD, stats.getNumOfDecomDeadDatanodes());
assertEquals(NUM_MAIN_LIVE, stats.getNumOfInMaintenanceLiveDataNodes());
assertEquals(NUM_MAIN_DEAD, stats.getNumOfInMaintenanceDeadDataNodes());
assertEquals(NUM_ENTER_MAIN, stats.getNumOfEnteringMaintenanceDataNodes());
assertEquals(TOTAL_SPACE, stats.getTotalSpace());
assertEquals(AVAILABLE_SPACE, stats.getAvailableSpace());
}