HDFS-11468. Ozone: SCM: Add Node Metrics for SCM. Contributed by Yiqun Lin.
This commit is contained in:
parent
393a02d8e1
commit
6783dad766
@ -69,6 +69,8 @@
|
||||
import org.apache.hadoop.ozone.scm.block.BlockManagerImpl;
|
||||
import org.apache.hadoop.ozone.scm.container.ContainerMapping;
|
||||
import org.apache.hadoop.ozone.scm.container.Mapping;
|
||||
import org.apache.hadoop.ozone.scm.container.placement.metrics.ContainerStat;
|
||||
import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMMetrics;
|
||||
import org.apache.hadoop.ozone.scm.exceptions.SCMException;
|
||||
import org.apache.hadoop.ozone.scm.node.NodeManager;
|
||||
import org.apache.hadoop.ozone.scm.node.SCMNodeManager;
|
||||
@ -163,6 +165,9 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl
|
||||
private final String scmUsername;
|
||||
private final Collection<String> scmAdminUsernames;
|
||||
|
||||
/** SCM metrics. */
|
||||
private static SCMMetrics metrics;
|
||||
|
||||
/**
|
||||
* Creates a new StorageContainerManager. Configuration will be updated with
|
||||
* information on the actual listening addresses used for RPC servers.
|
||||
@ -177,6 +182,7 @@ public StorageContainerManager(OzoneConfiguration conf)
|
||||
final int cacheSize = conf.getInt(OZONE_SCM_DB_CACHE_SIZE_MB,
|
||||
OZONE_SCM_DB_CACHE_SIZE_DEFAULT);
|
||||
|
||||
StorageContainerManager.initMetrics();
|
||||
// TODO : Fix the ClusterID generation code.
|
||||
scmNodeManager = new SCMNodeManager(conf, UUID.randomUUID().toString());
|
||||
scmContainerManager = new ContainerMapping(conf, scmNodeManager, cacheSize);
|
||||
@ -673,6 +679,7 @@ public void stop() {
|
||||
LOG.error("SCM block manager service stop failed.", ex);
|
||||
}
|
||||
|
||||
metrics.unRegister();
|
||||
unregisterMXBean();
|
||||
IOUtils.cleanupWithLogger(LOG, scmContainerManager);
|
||||
IOUtils.cleanupWithLogger(LOG, scmBlockManager);
|
||||
@ -752,6 +759,27 @@ public SCMHeartbeatResponseProto sendHeartbeat(DatanodeID datanodeID,
|
||||
@Override
|
||||
public ContainerReportsResponseProto sendContainerReport(
|
||||
ContainerReportsRequestProto reports) throws IOException {
|
||||
// TODO: We should update the logic once incremental container report
|
||||
// type is supported.
|
||||
if (reports.getType() ==
|
||||
ContainerReportsRequestProto.reportType.fullReport) {
|
||||
ContainerStat stat = new ContainerStat();
|
||||
for (StorageContainerDatanodeProtocolProtos.ContainerInfo info : reports
|
||||
.getReportsList()) {
|
||||
stat.add(new ContainerStat(info.getSize(), info.getUsed(),
|
||||
info.getKeyCount(), info.getReadBytes(), info.getWriteBytes(),
|
||||
info.getReadCount(), info.getWriteCount()));
|
||||
}
|
||||
|
||||
// update container metrics
|
||||
metrics.setLastContainerReportSize(stat.getSize().get());
|
||||
metrics.setLastContainerReportUsed(stat.getUsed().get());
|
||||
metrics.setLastContainerReportKeyCount(stat.getKeyCount().get());
|
||||
metrics.setLastContainerReportReadBytes(stat.getReadBytes().get());
|
||||
metrics.setLastContainerReportWriteBytes(stat.getWriteBytes().get());
|
||||
metrics.setLastContainerReportReadCount(stat.getReadCount().get());
|
||||
metrics.setLastContainerReportWriteCount(stat.getWriteCount().get());
|
||||
}
|
||||
|
||||
// TODO: handle the container reports either here or add container report
|
||||
// handler.
|
||||
@ -914,4 +942,17 @@ private void checkAdminAccess() throws IOException {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize SCM metrics.
|
||||
*/
|
||||
public static void initMetrics() {
|
||||
metrics = SCMMetrics.create();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return SCM metrics instance.
|
||||
*/
|
||||
public static SCMMetrics getMetrics() {
|
||||
return metrics == null ? SCMMetrics.create() : metrics;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,128 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.ozone.scm.container.placement.metrics;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
/**
|
||||
* This class represents the SCM container stat.
|
||||
*/
|
||||
public class ContainerStat {
|
||||
/**
|
||||
* The maximum container size.
|
||||
*/
|
||||
private LongMetric size;
|
||||
|
||||
/**
|
||||
* The number of bytes used by the container.
|
||||
*/
|
||||
private LongMetric used;
|
||||
|
||||
/**
|
||||
* The number of keys in the container.
|
||||
*/
|
||||
private LongMetric keyCount;
|
||||
|
||||
/**
|
||||
* The number of bytes read from the container.
|
||||
*/
|
||||
private LongMetric readBytes;
|
||||
|
||||
/**
|
||||
* The number of bytes write into the container.
|
||||
*/
|
||||
private LongMetric writeBytes;
|
||||
|
||||
/**
|
||||
* The number of times the container is read.
|
||||
*/
|
||||
private LongMetric readCount;
|
||||
|
||||
/**
|
||||
* The number of times the container is written into .
|
||||
*/
|
||||
private LongMetric writeCount;
|
||||
|
||||
public ContainerStat() {
|
||||
this(0L, 0L, 0L, 0L, 0L, 0L, 0L);
|
||||
}
|
||||
|
||||
public ContainerStat(long size, long used, long keyCount, long readBytes,
|
||||
long writeBytes, long readCount, long writeCount) {
|
||||
Preconditions.checkArgument(size >= 0,
|
||||
"Container size cannot be " + "negative.");
|
||||
Preconditions.checkArgument(used >= 0,
|
||||
"Used space cannot be " + "negative.");
|
||||
Preconditions.checkArgument(keyCount >= 0,
|
||||
"Key count cannot be " + "negative");
|
||||
Preconditions.checkArgument(readBytes >= 0,
|
||||
"Read bytes read cannot be " + "negative.");
|
||||
Preconditions.checkArgument(readBytes >= 0,
|
||||
"Write bytes cannot be " + "negative.");
|
||||
Preconditions.checkArgument(readCount >= 0,
|
||||
"Read count cannot be " + "negative.");
|
||||
Preconditions.checkArgument(writeCount >= 0,
|
||||
"Write count cannot be " + "negative");
|
||||
|
||||
this.size = new LongMetric(size);
|
||||
this.used = new LongMetric(used);
|
||||
this.keyCount = new LongMetric(keyCount);
|
||||
this.readBytes = new LongMetric(readBytes);
|
||||
this.writeBytes = new LongMetric(writeBytes);
|
||||
this.readCount = new LongMetric(readCount);
|
||||
this.writeCount = new LongMetric(writeCount);
|
||||
}
|
||||
|
||||
public LongMetric getSize() {
|
||||
return size;
|
||||
}
|
||||
|
||||
public LongMetric getUsed() {
|
||||
return used;
|
||||
}
|
||||
|
||||
public LongMetric getKeyCount() {
|
||||
return keyCount;
|
||||
}
|
||||
|
||||
public LongMetric getReadBytes() {
|
||||
return readBytes;
|
||||
}
|
||||
|
||||
public LongMetric getWriteBytes() {
|
||||
return writeBytes;
|
||||
}
|
||||
|
||||
public LongMetric getReadCount() {
|
||||
return readCount;
|
||||
}
|
||||
|
||||
public LongMetric getWriteCount() {
|
||||
return writeCount;
|
||||
}
|
||||
|
||||
public void add(ContainerStat stat) {
|
||||
this.size.add(stat.getSize().get());
|
||||
this.used.add(stat.getUsed().get());
|
||||
this.keyCount.add(stat.getKeyCount().get());
|
||||
this.readBytes.add(stat.getReadBytes().get());
|
||||
this.writeBytes.add(stat.getWriteBytes().get());
|
||||
this.readCount.add(stat.getReadCount().get());
|
||||
this.writeCount.add(stat.getWriteCount().get());
|
||||
}
|
||||
}
|
@ -0,0 +1,87 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.ozone.scm.container.placement.metrics;
|
||||
|
||||
import org.apache.hadoop.metrics2.MetricsSystem;
|
||||
import org.apache.hadoop.metrics2.annotation.Metric;
|
||||
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
|
||||
|
||||
/**
|
||||
* This class is for maintaining StorageContainerManager statistics.
|
||||
*/
|
||||
@Metrics(about="Storage Container Manager Metrics", context="dfs")
|
||||
public class SCMMetrics {
|
||||
public static final String SOURCE_NAME =
|
||||
SCMMetrics.class.getSimpleName();
|
||||
|
||||
/**
|
||||
* Container stat metrics, the meaning of following metrics
|
||||
* can be found in {@link ContainerStat}.
|
||||
*/
|
||||
@Metric private MutableGaugeLong lastContainerReportSize;
|
||||
@Metric private MutableGaugeLong lastContainerReportUsed;
|
||||
@Metric private MutableGaugeLong lastContainerReportKeyCount;
|
||||
@Metric private MutableGaugeLong lastContainerReportReadBytes;
|
||||
@Metric private MutableGaugeLong lastContainerReportWriteBytes;
|
||||
@Metric private MutableGaugeLong lastContainerReportReadCount;
|
||||
@Metric private MutableGaugeLong lastContainerReportWriteCount;
|
||||
|
||||
public SCMMetrics() {
|
||||
}
|
||||
|
||||
public static SCMMetrics create() {
|
||||
MetricsSystem ms = DefaultMetricsSystem.instance();
|
||||
return ms.register(SOURCE_NAME, "Storage Container Manager Metrics",
|
||||
new SCMMetrics());
|
||||
}
|
||||
|
||||
public void setLastContainerReportSize(long size) {
|
||||
this.lastContainerReportSize.set(size);
|
||||
}
|
||||
|
||||
public void setLastContainerReportUsed(long used) {
|
||||
this.lastContainerReportUsed.set(used);
|
||||
}
|
||||
|
||||
public void setLastContainerReportKeyCount(long keyCount) {
|
||||
this.lastContainerReportKeyCount.set(keyCount);
|
||||
}
|
||||
|
||||
public void setLastContainerReportReadBytes(long readBytes) {
|
||||
this.lastContainerReportReadBytes.set(readBytes);
|
||||
}
|
||||
|
||||
public void setLastContainerReportWriteBytes(long writeBytes) {
|
||||
this.lastContainerReportWriteBytes.set(writeBytes);
|
||||
}
|
||||
|
||||
public void setLastContainerReportReadCount(long readCount) {
|
||||
this.lastContainerReportReadCount.set(readCount);
|
||||
}
|
||||
|
||||
public void setLastContainerReportWriteCount(long writeCount) {
|
||||
this.lastContainerReportWriteCount.set(writeCount);
|
||||
}
|
||||
|
||||
public void unRegister() {
|
||||
MetricsSystem ms = DefaultMetricsSystem.instance();
|
||||
ms.unregisterSource(SOURCE_NAME);
|
||||
}
|
||||
}
|
@ -98,6 +98,26 @@ RPC operations.
|
||||
| `GetSmallFile` | Get small file operations |
|
||||
| `CloseContainer` | Close container operations |
|
||||
|
||||
### Storage Container Manager Metrics
|
||||
|
||||
The metrics for containers that managed by Storage Container Manager.
|
||||
|
||||
Storage Container Manager (SCM) is a master service which keeps track of
|
||||
replicas of storage containers. It also manages all data nodes and their
|
||||
states, dealing with container reports and dispatching commands for execution.
|
||||
|
||||
Following are the counters for containers:
|
||||
|
||||
| Name | Description |
|
||||
|:---- |:---- |
|
||||
| `LastContainerReportSize` | Total size in bytes of all containers |
|
||||
| `LastContainerReportUsed` | Total number of bytes used by all containers |
|
||||
| `LastContainerReportKeyCount` | Total number of keys in all containers |
|
||||
| `LastContainerReportReadBytes` | Total number of bytes have been read from all containers |
|
||||
| `LastContainerReportWriteBytes` | Total number of bytes have been written into all containers |
|
||||
| `LastContainerReportReadCount` | Total number of times containers have been read from |
|
||||
| `LastContainerReportWriteCount` | Total number of times containers have been written to |
|
||||
|
||||
### Key Space Metrics
|
||||
|
||||
The metrics for various key space manager operations in HDFS Ozone.
|
||||
|
@ -0,0 +1,116 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.ozone.scm;
|
||||
|
||||
import static org.apache.hadoop.test.MetricsAsserts.getLongGauge;
|
||||
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.util.UUID;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.hadoop.conf.OzoneConfiguration;
|
||||
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
||||
import org.apache.hadoop.ozone.MiniOzoneCluster;
|
||||
import org.apache.hadoop.ozone.OzoneConsts;
|
||||
import org.apache.hadoop.ozone.container.common.SCMTestUtils;
|
||||
import org.apache.hadoop.ozone.container.common.helpers.ContainerReport;
|
||||
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos;
|
||||
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
|
||||
import org.apache.hadoop.ozone.scm.container.placement.metrics.ContainerStat;
|
||||
import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMMetrics;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* This class tests the metrics of Storage Container Manager.
|
||||
*/
|
||||
public class TestSCMMetrics {
|
||||
private static MiniOzoneCluster cluster = null;
|
||||
|
||||
@Test
|
||||
public void testContainerMetrics() throws Exception {
|
||||
int nodeCount = 2;
|
||||
int numReport = 2;
|
||||
long size = OzoneConsts.GB * 5;
|
||||
long used = OzoneConsts.GB * 2;
|
||||
long readBytes = OzoneConsts.GB * 1;
|
||||
long writeBytes = OzoneConsts.GB * 2;
|
||||
int keyCount = 1000;
|
||||
int readCount = 100;
|
||||
int writeCount = 50;
|
||||
OzoneConfiguration conf = new OzoneConfiguration();
|
||||
|
||||
try {
|
||||
cluster = new MiniOzoneCluster.Builder(conf)
|
||||
.setHandlerType(OzoneConsts.OZONE_HANDLER_DISTRIBUTED)
|
||||
.numDataNodes(nodeCount).build();
|
||||
|
||||
ContainerStat stat = new ContainerStat(size, used, keyCount, readBytes,
|
||||
writeBytes, readCount, writeCount);
|
||||
StorageContainerManager scmManager = cluster.getStorageContainerManager();
|
||||
scmManager.sendContainerReport(createContainerReport(numReport, stat));
|
||||
|
||||
// verify container stat metrics
|
||||
MetricsRecordBuilder scmMetrics = getMetrics(SCMMetrics.SOURCE_NAME);
|
||||
assertEquals(size * numReport,
|
||||
getLongGauge("LastContainerReportSize", scmMetrics));
|
||||
assertEquals(used * numReport,
|
||||
getLongGauge("LastContainerReportUsed", scmMetrics));
|
||||
assertEquals(readBytes * numReport,
|
||||
getLongGauge("LastContainerReportReadBytes", scmMetrics));
|
||||
assertEquals(writeBytes * numReport,
|
||||
getLongGauge("LastContainerReportWriteBytes", scmMetrics));
|
||||
|
||||
assertEquals(keyCount * numReport,
|
||||
getLongGauge("LastContainerReportKeyCount", scmMetrics));
|
||||
assertEquals(readCount * numReport,
|
||||
getLongGauge("LastContainerReportReadCount", scmMetrics));
|
||||
assertEquals(writeCount * numReport,
|
||||
getLongGauge("LastContainerReportWriteCount", scmMetrics));
|
||||
} finally {
|
||||
if (cluster != null) {
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private ContainerReportsRequestProto createContainerReport(int numReport,
|
||||
ContainerStat stat) {
|
||||
StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto.Builder
|
||||
reportsBuilder = StorageContainerDatanodeProtocolProtos
|
||||
.ContainerReportsRequestProto.newBuilder();
|
||||
|
||||
for (int i = 0; i < numReport; i++) {
|
||||
ContainerReport report = new ContainerReport(
|
||||
UUID.randomUUID().toString(), DigestUtils.sha256Hex("Simulated"));
|
||||
report.setSize(stat.getSize().get());
|
||||
report.setBytesUsed(stat.getUsed().get());
|
||||
report.setReadCount(stat.getReadCount().get());
|
||||
report.setReadBytes(stat.getReadBytes().get());
|
||||
report.setKeyCount(stat.getKeyCount().get());
|
||||
report.setWriteCount(stat.getWriteCount().get());
|
||||
report.setWriteBytes(stat.getWriteBytes().get());
|
||||
reportsBuilder.addReports(report.getProtoBufMessage());
|
||||
}
|
||||
reportsBuilder.setDatanodeID(SCMTestUtils.getDatanodeID()
|
||||
.getProtoBufMessage());
|
||||
reportsBuilder.setType(StorageContainerDatanodeProtocolProtos
|
||||
.ContainerReportsRequestProto.reportType.fullReport);
|
||||
return reportsBuilder.build();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user