From 1ce25e702b5086fc81f7fc23fcb87db167f3804d Mon Sep 17 00:00:00 2001 From: Inigo Goiri Date: Sun, 3 Mar 2019 10:51:58 -0800 Subject: [PATCH] HDFS-14329. RBF: Add maintenance nodes to federation metrics. Contributed by Ayush Saxena. --- .../federation/metrics/FederationMBean.java | 18 ++++++++ .../federation/metrics/FederationMetrics.java | 18 ++++++++ .../metrics/NamenodeBeanMetrics.java | 18 ++++++++ .../resolver/MembershipNamenodeResolver.java | 6 +++ .../resolver/NamenodeStatusReport.java | 46 ++++++++++++++++++- .../router/NamenodeHeartbeatService.java | 5 +- .../store/records/MembershipStats.java | 12 +++++ .../impl/pb/MembershipStatsPBImpl.java | 33 +++++++++++++ .../src/main/proto/FederationProtocol.proto | 3 ++ .../metrics/TestFederationMetrics.java | 22 +++++++++ .../store/records/TestMembershipState.java | 9 ++++ 11 files changed, 188 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMBean.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMBean.java index b37f5efb94..8f24fcbbd1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMBean.java @@ -130,6 +130,24 @@ public interface FederationMBean { */ int getNumDecomDeadNodes(); + /** + * Get the number of live datanodes which are under maintenance. + * @return Number of live datanodes which are under maintenance. + */ + int getNumInMaintenanceLiveDataNodes(); + + /** + * Get the number of dead datanodes which are under maintenance. + * @return Number of dead datanodes which are under maintenance. + */ + int getNumInMaintenanceDeadDataNodes(); + + /** + * Get the number of datanodes which are entering maintenance. + * @return Number of datanodes which are entering maintenance. + */ + int getNumEnteringMaintenanceDataNodes(); + /** * Get Max, Median, Min and Standard Deviation of DataNodes usage. * @return the DataNode usage information, as a JSON string. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMetrics.java index c66910cf97..5ab978d7f2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMetrics.java @@ -437,6 +437,24 @@ public int getNumDecomDeadNodes() { MembershipStats::getNumOfDecomDeadDatanodes); } + @Override + public int getNumInMaintenanceLiveDataNodes() { + return getNameserviceAggregatedInt( + MembershipStats::getNumOfInMaintenanceLiveDataNodes); + } + + @Override + public int getNumInMaintenanceDeadDataNodes() { + return getNameserviceAggregatedInt( + MembershipStats::getNumOfInMaintenanceDeadDataNodes); + } + + @Override + public int getNumEnteringMaintenanceDataNodes() { + return getNameserviceAggregatedInt( + MembershipStats::getNumOfEnteringMaintenanceDataNodes); + } + @Override // NameNodeMXBean public String getNodeUsage() { float median = 0; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java index 9687af0236..50ec175fa9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java @@ -697,16 +697,34 @@ public int getNumDecommissioningDataNodes() { @Override public int getNumInMaintenanceLiveDataNodes() { + try { + return getFederationMetrics().getNumInMaintenanceLiveDataNodes(); + } catch (IOException e) { + LOG.debug("Failed to get number of live in maintenance nodes", + e.getMessage()); + } return 0; } @Override public int getNumInMaintenanceDeadDataNodes() { + try { + return getFederationMetrics().getNumInMaintenanceDeadDataNodes(); + } catch (IOException e) { + LOG.debug("Failed to get number of dead in maintenance nodes", + e.getMessage()); + } return 0; } @Override public int getNumEnteringMaintenanceDataNodes() { + try { + return getFederationMetrics().getNumEnteringMaintenanceDataNodes(); + } catch (IOException e) { + LOG.debug("Failed to get number of entering maintenance nodes", + e.getMessage()); + } return 0; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MembershipNamenodeResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MembershipNamenodeResolver.java index 178db1b3dd..2dd53d819c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MembershipNamenodeResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MembershipNamenodeResolver.java @@ -283,6 +283,12 @@ public boolean registerNamenode(NamenodeStatusReport report) stats.setNumOfStaleDatanodes(report.getNumStaleDatanodes()); stats.setNumOfDecomActiveDatanodes(report.getNumDecomLiveDatanodes()); stats.setNumOfDecomDeadDatanodes(report.getNumDecomDeadDatanodes()); + stats.setNumOfInMaintenanceLiveDataNodes( + report.getNumInMaintenanceLiveDataNodes()); + stats.setNumOfInMaintenanceDeadDataNodes( + report.getNumInMaintenanceDeadDataNodes()); + stats.setNumOfEnteringMaintenanceDataNodes( + report.getNumEnteringMaintenanceDataNodes()); record.setStats(stats); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/NamenodeStatusReport.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/NamenodeStatusReport.java index 5b603facb0..c82e3eb674 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/NamenodeStatusReport.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/NamenodeStatusReport.java @@ -50,6 +50,15 @@ public class NamenodeStatusReport { /** Dead decommissioned datanodes. */ private int deadDecomDatanodes = -1; + /** Live in maintenance datanodes. */ + private int inMaintenanceLiveDataNodes = -1; + + /** Dead in maintenance datanodes. */ + private int inMaintenanceDeadDataNodes = -1; + + /** Entering maintenance datanodes. */ + private int enteringMaintenanceDataNodes = -1; + /** Space stats. */ private long availableSpace = -1; private long numOfFiles = -1; @@ -228,15 +237,23 @@ public boolean getSafemode() { * @param numDecom Number of decommissioning nodes. * @param numLiveDecom Number of decommissioned live nodes. * @param numDeadDecom Number of decommissioned dead nodes. + * @param numInMaintenanceLive Number of in maintenance live nodes. + * @param numInMaintenanceDead Number of in maintenance dead nodes. + * @param numEnteringMaintenance Number of entering maintenance nodes. */ public void setDatanodeInfo(int numLive, int numDead, int numStale, - int numDecom, int numLiveDecom, int numDeadDecom) { + int numDecom, int numLiveDecom, int numDeadDecom, + int numInMaintenanceLive, int numInMaintenanceDead, + int numEnteringMaintenance) { this.liveDatanodes = numLive; this.deadDatanodes = numDead; this.staleDatanodes = numStale; this.decomDatanodes = numDecom; this.liveDecomDatanodes = numLiveDecom; this.deadDecomDatanodes = numDeadDecom; + this.inMaintenanceLiveDataNodes = numInMaintenanceLive; + this.inMaintenanceDeadDataNodes = numInMaintenanceDead; + this.enteringMaintenanceDataNodes = numEnteringMaintenance; this.statsValid = true; } @@ -294,6 +311,33 @@ public int getNumDecomDeadDatanodes() { return this.deadDecomDatanodes; } + /** + * Get the number of live in maintenance nodes. + * + * @return The number of live in maintenance nodes. + */ + public int getNumInMaintenanceLiveDataNodes() { + return this.inMaintenanceLiveDataNodes; + } + + /** + * Get the number of dead in maintenance nodes. + * + * @return The number of dead in maintenance nodes. + */ + public int getNumInMaintenanceDeadDataNodes() { + return this.inMaintenanceDeadDataNodes; + } + + /** + * Get the number of entering maintenance nodes. + * + * @return The number of entering maintenance nodes. + */ + public int getNumEnteringMaintenanceDataNodes() { + return this.enteringMaintenanceDataNodes; + } + /** * Set the filesystem information. * diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java index 475e90d798..82b5609a86 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java @@ -341,7 +341,10 @@ private void updateJMXParameters( jsonObject.getInt("NumStaleDataNodes"), jsonObject.getInt("NumDecommissioningDataNodes"), jsonObject.getInt("NumDecomLiveDataNodes"), - jsonObject.getInt("NumDecomDeadDataNodes")); + jsonObject.getInt("NumDecomDeadDataNodes"), + jsonObject.getInt("NumInMaintenanceLiveDataNodes"), + jsonObject.getInt("NumInMaintenanceDeadDataNodes"), + jsonObject.getInt("NumEnteringMaintenanceDataNodes")); } else if (name.equals( "Hadoop:service=NameNode,name=FSNamesystem")) { report.setNamesystemInfo( diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MembershipStats.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MembershipStats.java index d452cd2c40..95c790cc95 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MembershipStats.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MembershipStats.java @@ -97,6 +97,18 @@ public static MembershipStats newInstance() throws IOException { public abstract int getNumOfDecomDeadDatanodes(); + public abstract void setNumOfInMaintenanceLiveDataNodes(int nodes); + + public abstract int getNumOfInMaintenanceLiveDataNodes(); + + public abstract void setNumOfInMaintenanceDeadDataNodes(int nodes); + + public abstract int getNumOfInMaintenanceDeadDataNodes(); + + public abstract void setNumOfEnteringMaintenanceDataNodes(int nodes); + + public abstract int getNumOfEnteringMaintenanceDataNodes(); + @Override public SortedMap getPrimaryKeys() { // This record is not stored directly, no key needed diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MembershipStatsPBImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MembershipStatsPBImpl.java index 50ecbf3d48..9a8a2bbb66 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MembershipStatsPBImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MembershipStatsPBImpl.java @@ -208,4 +208,37 @@ public void setNumOfDecomDeadDatanodes(int nodes) { public int getNumOfDecomDeadDatanodes() { return this.translator.getProtoOrBuilder().getNumOfDecomDeadDatanodes(); } + + @Override + public void setNumOfInMaintenanceLiveDataNodes(int nodes) { + this.translator.getBuilder().setNumOfInMaintenanceLiveDataNodes(nodes); + } + + @Override + public int getNumOfInMaintenanceLiveDataNodes() { + return this.translator.getProtoOrBuilder() + .getNumOfInMaintenanceLiveDataNodes(); + } + + @Override + public void setNumOfInMaintenanceDeadDataNodes(int nodes) { + this.translator.getBuilder().setNumOfInMaintenanceDeadDataNodes(nodes); + } + + @Override + public int getNumOfInMaintenanceDeadDataNodes() { + return this.translator.getProtoOrBuilder() + .getNumOfInMaintenanceDeadDataNodes(); + } + + @Override + public void setNumOfEnteringMaintenanceDataNodes(int nodes) { + this.translator.getBuilder().setNumOfEnteringMaintenanceDataNodes(nodes); + } + + @Override + public int getNumOfEnteringMaintenanceDataNodes() { + return this.translator.getProtoOrBuilder() + .getNumOfEnteringMaintenanceDataNodes(); + } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto index 9e9fd4899c..a55be731a7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto @@ -46,6 +46,9 @@ message NamenodeMembershipStatsRecordProto { optional uint32 numOfDecomActiveDatanodes = 23; optional uint32 numOfDecomDeadDatanodes = 24; optional uint32 numOfStaleDatanodes = 25; + optional uint32 numOfInMaintenanceLiveDataNodes = 26; + optional uint32 numOfInMaintenanceDeadDataNodes = 27; + optional uint32 numOfEnteringMaintenanceDataNodes = 28; } message NamenodeMembershipRecordProto { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestFederationMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestFederationMetrics.java index 5d984e8645..2c147ebf1d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestFederationMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestFederationMetrics.java @@ -145,6 +145,12 @@ public void testNamenodeStatsDataSource() throws IOException, JSONException { stats.getNumOfDecomActiveDatanodes()); assertEquals(json.getLong("numOfDecomDeadDatanodes"), stats.getNumOfDecomDeadDatanodes()); + assertEquals(json.getLong("numOfInMaintenanceLiveDataNodes"), + stats.getNumOfInMaintenanceLiveDataNodes()); + assertEquals(json.getLong("numOfInMaintenanceDeadDataNodes"), + stats.getNumOfInMaintenanceDeadDataNodes()); + assertEquals(json.getLong("numOfEnteringMaintenanceDataNodes"), + stats.getNumOfEnteringMaintenanceDataNodes()); assertEquals(json.getLong("numOfBlocks"), stats.getNumOfBlocks()); assertEquals(json.getString("rpcAddress"), mockEntry.getRpcAddress()); assertEquals(json.getString("webAddress"), mockEntry.getWebAddress()); @@ -197,6 +203,12 @@ public void testNameserviceStatsDataSource() json.getLong("numOfDecomActiveDatanodes")); assertEquals(stats.getNumOfDecomDeadDatanodes(), json.getLong("numOfDecomDeadDatanodes")); + assertEquals(stats.getNumOfInMaintenanceLiveDataNodes(), + json.getLong("numOfInMaintenanceLiveDataNodes")); + assertEquals(stats.getNumOfInMaintenanceDeadDataNodes(), + json.getLong("numOfInMaintenanceDeadDataNodes")); + assertEquals(stats.getNumOfStaleDatanodes(), + json.getLong("numOfEnteringMaintenanceDataNodes")); assertEquals(stats.getProvidedSpace(), json.getLong("providedSpace")); nameservicesFound++; @@ -268,6 +280,9 @@ private void validateClusterStatsBean(FederationMBean bean) long numDecom = 0; long numDecomLive = 0; long numDecomDead = 0; + long numInMaintenanceLive = 0; + long numInMaintenanceDead = 0; + long numEnteringMaintenance = 0; long numFiles = 0; for (MembershipState mock : getActiveMemberships()) { MembershipStats stats = mock.getStats(); @@ -278,6 +293,9 @@ private void validateClusterStatsBean(FederationMBean bean) numDecom += stats.getNumOfDecommissioningDatanodes(); numDecomLive += stats.getNumOfDecomActiveDatanodes(); numDecomDead += stats.getNumOfDecomDeadDatanodes(); + numInMaintenanceLive += stats.getNumOfInMaintenanceLiveDataNodes(); + numInMaintenanceDead += stats.getNumOfInMaintenanceLiveDataNodes(); + numEnteringMaintenance += stats.getNumOfEnteringMaintenanceDataNodes(); } assertEquals(numBlocks, bean.getNumBlocks()); @@ -287,6 +305,10 @@ private void validateClusterStatsBean(FederationMBean bean) assertEquals(numDecom, bean.getNumDecommissioningNodes()); assertEquals(numDecomLive, bean.getNumDecomLiveNodes()); assertEquals(numDecomDead, bean.getNumDecomDeadNodes()); + assertEquals(numInMaintenanceLive, bean.getNumInMaintenanceLiveDataNodes()); + assertEquals(numInMaintenanceDead, bean.getNumInMaintenanceDeadDataNodes()); + assertEquals(numEnteringMaintenance, + bean.getNumEnteringMaintenanceDataNodes()); assertEquals(numFiles, bean.getNumFiles()); assertEquals(getActiveMemberships().size() + getStandbyMemberships().size(), bean.getNumNamenodes()); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestMembershipState.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestMembershipState.java index 1aac632784..df41f461e6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestMembershipState.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestMembershipState.java @@ -52,6 +52,9 @@ public class TestMembershipState { private static final int NUM_DECOM = 700; private static final int NUM_DECOM_ACTIVE = 800; private static final int NUM_DECOM_DEAD = 900; + private static final int NUM_MAIN_LIVE = 151; + private static final int NUM_MAIN_DEAD = 303; + private static final int NUM_ENTER_MAIN = 144; private static final long NUM_BLOCK_MISSING = 1000; private static final long TOTAL_SPACE = 1100; @@ -78,6 +81,9 @@ private MembershipState createRecord() throws IOException { stats.setNumOfDecommissioningDatanodes(NUM_DECOM); stats.setNumOfDecomActiveDatanodes(NUM_DECOM_ACTIVE); stats.setNumOfDecomDeadDatanodes(NUM_DECOM_DEAD); + stats.setNumOfInMaintenanceLiveDataNodes(NUM_MAIN_LIVE); + stats.setNumOfInMaintenanceDeadDataNodes(NUM_MAIN_DEAD); + stats.setNumOfEnteringMaintenanceDataNodes(NUM_ENTER_MAIN); stats.setNumOfBlocksMissing(NUM_BLOCK_MISSING); stats.setTotalSpace(TOTAL_SPACE); stats.setAvailableSpace(AVAILABLE_SPACE); @@ -107,6 +113,9 @@ private void validateRecord(MembershipState record) throws IOException { assertEquals(NUM_DECOM, stats.getNumOfDecommissioningDatanodes()); assertEquals(NUM_DECOM_ACTIVE, stats.getNumOfDecomActiveDatanodes()); assertEquals(NUM_DECOM_DEAD, stats.getNumOfDecomDeadDatanodes()); + assertEquals(NUM_MAIN_LIVE, stats.getNumOfInMaintenanceLiveDataNodes()); + assertEquals(NUM_MAIN_DEAD, stats.getNumOfInMaintenanceDeadDataNodes()); + assertEquals(NUM_ENTER_MAIN, stats.getNumOfEnteringMaintenanceDataNodes()); assertEquals(TOTAL_SPACE, stats.getTotalSpace()); assertEquals(AVAILABLE_SPACE, stats.getAvailableSpace()); }