From 1dc01e59af9a504e473d696be3d40e056db450ab Mon Sep 17 00:00:00 2001 From: Inigo Goiri Date: Wed, 2 Jan 2019 10:38:33 -0800 Subject: [PATCH] HDFS-14167. RBF: Add stale nodes to federation metrics. Contributed by Inigo Goiri. --- .../federation/metrics/FederationMBean.java | 6 ++++++ .../federation/metrics/FederationMetrics.java | 6 ++++++ .../metrics/NamenodeBeanMetrics.java | 7 ++++++- .../resolver/MembershipNamenodeResolver.java | 1 + .../resolver/NamenodeStatusReport.java | 18 +++++++++++++++--- .../router/NamenodeHeartbeatService.java | 1 + .../store/records/MembershipStats.java | 4 ++++ .../records/impl/pb/MembershipStatsPBImpl.java | 10 ++++++++++ .../src/main/proto/FederationProtocol.proto | 1 + .../metrics/TestFederationMetrics.java | 7 +++++++ .../store/records/TestMembershipState.java | 3 +++ 11 files changed, 60 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMBean.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMBean.java index 79fb3e4326..b37f5efb94 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMBean.java @@ -106,6 +106,12 @@ public interface FederationMBean { */ int getNumDeadNodes(); + /** + * Get the number of stale datanodes. + * @return Number of stale datanodes. + */ + int getNumStaleNodes(); + /** * Get the number of decommissioning datanodes. * @return Number of decommissioning datanodes. diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMetrics.java index 6a0a46e89e..b3fe6cc9e2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/FederationMetrics.java @@ -413,6 +413,12 @@ public int getNumDeadNodes() { return getNameserviceAggregatedInt(MembershipStats::getNumOfDeadDatanodes); } + @Override + public int getNumStaleNodes() { + return getNameserviceAggregatedInt( + MembershipStats::getNumOfStaleDatanodes); + } + @Override public int getNumDecommissioningNodes() { return getNameserviceAggregatedInt( diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java index a05fdc1449..1b97b1aa9c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java @@ -631,7 +631,12 @@ public int getNumDeadDataNodes() { @Override public int getNumStaleDataNodes() { - return -1; + try { + return getFederationMetrics().getNumStaleNodes(); + } catch (IOException e) { + LOG.debug("Failed to get number of stale nodes", e.getMessage()); + } + return 0; } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MembershipNamenodeResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MembershipNamenodeResolver.java index 2707304f60..178db1b3dd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MembershipNamenodeResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MembershipNamenodeResolver.java @@ -280,6 +280,7 @@ public boolean registerNamenode(NamenodeStatusReport report) report.getNumDecommissioningDatanodes()); stats.setNumOfActiveDatanodes(report.getNumLiveDatanodes()); stats.setNumOfDeadDatanodes(report.getNumDeadDatanodes()); + stats.setNumOfStaleDatanodes(report.getNumStaleDatanodes()); stats.setNumOfDecomActiveDatanodes(report.getNumDecomLiveDatanodes()); stats.setNumOfDecomDeadDatanodes(report.getNumDecomDeadDatanodes()); record.setStats(stats); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/NamenodeStatusReport.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/NamenodeStatusReport.java index b121e24a0e..5b603facb0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/NamenodeStatusReport.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/NamenodeStatusReport.java @@ -42,6 +42,7 @@ public class NamenodeStatusReport { /** Datanodes stats. */ private int liveDatanodes = -1; private int deadDatanodes = -1; + private int staleDatanodes = -1; /** Decommissioning datanodes. */ private int decomDatanodes = -1; /** Live decommissioned datanodes. */ @@ -223,14 +224,16 @@ public boolean getSafemode() { * * @param numLive Number of live nodes. * @param numDead Number of dead nodes. + * @param numStale Number of stale nodes. * @param numDecom Number of decommissioning nodes. * @param numLiveDecom Number of decommissioned live nodes. * @param numDeadDecom Number of decommissioned dead nodes. */ - public void setDatanodeInfo(int numLive, int numDead, int numDecom, - int numLiveDecom, int numDeadDecom) { + public void setDatanodeInfo(int numLive, int numDead, int numStale, + int numDecom, int numLiveDecom, int numDeadDecom) { this.liveDatanodes = numLive; this.deadDatanodes = numDead; + this.staleDatanodes = numStale; this.decomDatanodes = numDecom; this.liveDecomDatanodes = numLiveDecom; this.deadDecomDatanodes = numDeadDecom; @@ -247,7 +250,7 @@ public int getNumLiveDatanodes() { } /** - * Get the number of dead blocks. + * Get the number of dead nodes. * * @return The number of dead nodes. */ @@ -255,6 +258,15 @@ public int getNumDeadDatanodes() { return this.deadDatanodes; } + /** + * Get the number of stale nodes. + * + * @return The number of stale nodes. + */ + public int getNumStaleDatanodes() { + return this.staleDatanodes; + } + /** * Get the number of decommissionining nodes. * diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java index 871ebaf9ee..475e90d798 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/NamenodeHeartbeatService.java @@ -338,6 +338,7 @@ private void updateJMXParameters( report.setDatanodeInfo( jsonObject.getInt("NumLiveDataNodes"), jsonObject.getInt("NumDeadDataNodes"), + jsonObject.getInt("NumStaleDataNodes"), jsonObject.getInt("NumDecommissioningDataNodes"), jsonObject.getInt("NumDecomLiveDataNodes"), jsonObject.getInt("NumDecomDeadDataNodes")); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MembershipStats.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MembershipStats.java index 654140cbd5..d452cd2c40 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MembershipStats.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/MembershipStats.java @@ -81,6 +81,10 @@ public static MembershipStats newInstance() throws IOException { public abstract int getNumOfDeadDatanodes(); + public abstract void setNumOfStaleDatanodes(int nodes); + + public abstract int getNumOfStaleDatanodes(); + public abstract void setNumOfDecommissioningDatanodes(int nodes); public abstract int getNumOfDecommissioningDatanodes(); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MembershipStatsPBImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MembershipStatsPBImpl.java index 3347bc677f..50ecbf3d48 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MembershipStatsPBImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MembershipStatsPBImpl.java @@ -168,6 +168,16 @@ public int getNumOfDeadDatanodes() { return this.translator.getProtoOrBuilder().getNumOfDeadDatanodes(); } + @Override + public void setNumOfStaleDatanodes(int nodes) { + this.translator.getBuilder().setNumOfStaleDatanodes(nodes); + } + + @Override + public int getNumOfStaleDatanodes() { + return this.translator.getProtoOrBuilder().getNumOfStaleDatanodes(); + } + @Override public void setNumOfDecommissioningDatanodes(int nodes) { this.translator.getBuilder().setNumOfDecommissioningDatanodes(nodes); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto index 17ae299bcd..1e5e37b3e2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto @@ -45,6 +45,7 @@ message NamenodeMembershipStatsRecordProto { optional uint32 numOfDecommissioningDatanodes = 22; optional uint32 numOfDecomActiveDatanodes = 23; optional uint32 numOfDecomDeadDatanodes = 24; + optional uint32 numOfStaleDatanodes = 25; } message NamenodeMembershipRecordProto { diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestFederationMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestFederationMetrics.java index 94799f35d9..5d984e8645 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestFederationMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/metrics/TestFederationMetrics.java @@ -137,6 +137,8 @@ public void testNamenodeStatsDataSource() throws IOException, JSONException { stats.getNumOfActiveDatanodes()); assertEquals(json.getLong("numOfDeadDatanodes"), stats.getNumOfDeadDatanodes()); + assertEquals(json.getLong("numOfStaleDatanodes"), + stats.getNumOfStaleDatanodes()); assertEquals(json.getLong("numOfDecommissioningDatanodes"), stats.getNumOfDecommissioningDatanodes()); assertEquals(json.getLong("numOfDecomActiveDatanodes"), @@ -187,6 +189,8 @@ public void testNameserviceStatsDataSource() json.getLong("numOfActiveDatanodes")); assertEquals(stats.getNumOfDeadDatanodes(), json.getLong("numOfDeadDatanodes")); + assertEquals(stats.getNumOfStaleDatanodes(), + json.getLong("numOfStaleDatanodes")); assertEquals(stats.getNumOfDecommissioningDatanodes(), json.getLong("numOfDecommissioningDatanodes")); assertEquals(stats.getNumOfDecomActiveDatanodes(), @@ -260,6 +264,7 @@ private void validateClusterStatsBean(FederationMBean bean) long numBlocks = 0; long numLive = 0; long numDead = 0; + long numStale = 0; long numDecom = 0; long numDecomLive = 0; long numDecomDead = 0; @@ -269,6 +274,7 @@ private void validateClusterStatsBean(FederationMBean bean) numBlocks += stats.getNumOfBlocks(); numLive += stats.getNumOfActiveDatanodes(); numDead += stats.getNumOfDeadDatanodes(); + numStale += stats.getNumOfStaleDatanodes(); numDecom += stats.getNumOfDecommissioningDatanodes(); numDecomLive += stats.getNumOfDecomActiveDatanodes(); numDecomDead += stats.getNumOfDecomDeadDatanodes(); @@ -277,6 +283,7 @@ private void validateClusterStatsBean(FederationMBean bean) assertEquals(numBlocks, bean.getNumBlocks()); assertEquals(numLive, bean.getNumLiveNodes()); assertEquals(numDead, bean.getNumDeadNodes()); + assertEquals(numStale, bean.getNumStaleNodes()); assertEquals(numDecom, bean.getNumDecommissioningNodes()); assertEquals(numDecomLive, bean.getNumDecomLiveNodes()); assertEquals(numDecomDead, bean.getNumDecomDeadNodes()); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestMembershipState.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestMembershipState.java index d922414b8a..1aac632784 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestMembershipState.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/TestMembershipState.java @@ -47,6 +47,7 @@ public class TestMembershipState { private static final long NUM_BLOCKS = 300; private static final long NUM_FILES = 400; private static final int NUM_DEAD = 500; + private static final int NUM_STALE = 550; private static final int NUM_ACTIVE = 600; private static final int NUM_DECOM = 700; private static final int NUM_DECOM_ACTIVE = 800; @@ -73,6 +74,7 @@ private MembershipState createRecord() throws IOException { stats.setNumOfFiles(NUM_FILES); stats.setNumOfActiveDatanodes(NUM_ACTIVE); stats.setNumOfDeadDatanodes(NUM_DEAD); + stats.setNumOfStaleDatanodes(NUM_STALE); stats.setNumOfDecommissioningDatanodes(NUM_DECOM); stats.setNumOfDecomActiveDatanodes(NUM_DECOM_ACTIVE); stats.setNumOfDecomDeadDatanodes(NUM_DECOM_DEAD); @@ -101,6 +103,7 @@ private void validateRecord(MembershipState record) throws IOException { assertEquals(NUM_FILES, stats.getNumOfFiles()); assertEquals(NUM_ACTIVE, stats.getNumOfActiveDatanodes()); assertEquals(NUM_DEAD, stats.getNumOfDeadDatanodes()); + assertEquals(NUM_STALE, stats.getNumOfStaleDatanodes()); assertEquals(NUM_DECOM, stats.getNumOfDecommissioningDatanodes()); assertEquals(NUM_DECOM_ACTIVE, stats.getNumOfDecomActiveDatanodes()); assertEquals(NUM_DECOM_DEAD, stats.getNumOfDecomDeadDatanodes());