HDFS-11468. Ozone: SCM: Add Node Metrics for SCM. Contributed by Yiqun Lin.

This commit is contained in:
Yiqun Lin 2017-10-25 10:34:47 +08:00 committed by Owen O'Malley
parent 393a02d8e1
commit 6783dad766
5 changed files with 392 additions and 0 deletions

View File

@ -69,6 +69,8 @@
import org.apache.hadoop.ozone.scm.block.BlockManagerImpl;
import org.apache.hadoop.ozone.scm.container.ContainerMapping;
import org.apache.hadoop.ozone.scm.container.Mapping;
import org.apache.hadoop.ozone.scm.container.placement.metrics.ContainerStat;
import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMMetrics;
import org.apache.hadoop.ozone.scm.exceptions.SCMException;
import org.apache.hadoop.ozone.scm.node.NodeManager;
import org.apache.hadoop.ozone.scm.node.SCMNodeManager;
@ -163,6 +165,9 @@ public class StorageContainerManager extends ServiceRuntimeInfoImpl
private final String scmUsername;
private final Collection<String> scmAdminUsernames;
/** SCM metrics. */
private static SCMMetrics metrics;
/**
* Creates a new StorageContainerManager. Configuration will be updated with
* information on the actual listening addresses used for RPC servers.
@ -177,6 +182,7 @@ public StorageContainerManager(OzoneConfiguration conf)
final int cacheSize = conf.getInt(OZONE_SCM_DB_CACHE_SIZE_MB,
OZONE_SCM_DB_CACHE_SIZE_DEFAULT);
StorageContainerManager.initMetrics();
// TODO : Fix the ClusterID generation code.
scmNodeManager = new SCMNodeManager(conf, UUID.randomUUID().toString());
scmContainerManager = new ContainerMapping(conf, scmNodeManager, cacheSize);
@ -673,6 +679,7 @@ public void stop() {
LOG.error("SCM block manager service stop failed.", ex);
}
metrics.unRegister();
unregisterMXBean();
IOUtils.cleanupWithLogger(LOG, scmContainerManager);
IOUtils.cleanupWithLogger(LOG, scmBlockManager);
@ -752,6 +759,27 @@ public SCMHeartbeatResponseProto sendHeartbeat(DatanodeID datanodeID,
@Override
public ContainerReportsResponseProto sendContainerReport(
ContainerReportsRequestProto reports) throws IOException {
// TODO: We should update the logic once incremental container report
// type is supported.
if (reports.getType() ==
ContainerReportsRequestProto.reportType.fullReport) {
ContainerStat stat = new ContainerStat();
for (StorageContainerDatanodeProtocolProtos.ContainerInfo info : reports
.getReportsList()) {
stat.add(new ContainerStat(info.getSize(), info.getUsed(),
info.getKeyCount(), info.getReadBytes(), info.getWriteBytes(),
info.getReadCount(), info.getWriteCount()));
}
// update container metrics
metrics.setLastContainerReportSize(stat.getSize().get());
metrics.setLastContainerReportUsed(stat.getUsed().get());
metrics.setLastContainerReportKeyCount(stat.getKeyCount().get());
metrics.setLastContainerReportReadBytes(stat.getReadBytes().get());
metrics.setLastContainerReportWriteBytes(stat.getWriteBytes().get());
metrics.setLastContainerReportReadCount(stat.getReadCount().get());
metrics.setLastContainerReportWriteCount(stat.getWriteCount().get());
}
// TODO: handle the container reports either here or add container report
// handler.
@ -914,4 +942,17 @@ private void checkAdminAccess() throws IOException {
}
}
/**
* Initialize SCM metrics.
*/
public static void initMetrics() {
metrics = SCMMetrics.create();
}
/**
* Return SCM metrics instance.
*/
public static SCMMetrics getMetrics() {
return metrics == null ? SCMMetrics.create() : metrics;
}
}

View File

@ -0,0 +1,128 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.scm.container.placement.metrics;
import com.google.common.base.Preconditions;
/**
* This class represents the SCM container stat.
*/
public class ContainerStat {
/**
* The maximum container size.
*/
private LongMetric size;
/**
* The number of bytes used by the container.
*/
private LongMetric used;
/**
* The number of keys in the container.
*/
private LongMetric keyCount;
/**
* The number of bytes read from the container.
*/
private LongMetric readBytes;
/**
* The number of bytes write into the container.
*/
private LongMetric writeBytes;
/**
* The number of times the container is read.
*/
private LongMetric readCount;
/**
* The number of times the container is written into .
*/
private LongMetric writeCount;
public ContainerStat() {
this(0L, 0L, 0L, 0L, 0L, 0L, 0L);
}
public ContainerStat(long size, long used, long keyCount, long readBytes,
long writeBytes, long readCount, long writeCount) {
Preconditions.checkArgument(size >= 0,
"Container size cannot be " + "negative.");
Preconditions.checkArgument(used >= 0,
"Used space cannot be " + "negative.");
Preconditions.checkArgument(keyCount >= 0,
"Key count cannot be " + "negative");
Preconditions.checkArgument(readBytes >= 0,
"Read bytes read cannot be " + "negative.");
Preconditions.checkArgument(readBytes >= 0,
"Write bytes cannot be " + "negative.");
Preconditions.checkArgument(readCount >= 0,
"Read count cannot be " + "negative.");
Preconditions.checkArgument(writeCount >= 0,
"Write count cannot be " + "negative");
this.size = new LongMetric(size);
this.used = new LongMetric(used);
this.keyCount = new LongMetric(keyCount);
this.readBytes = new LongMetric(readBytes);
this.writeBytes = new LongMetric(writeBytes);
this.readCount = new LongMetric(readCount);
this.writeCount = new LongMetric(writeCount);
}
public LongMetric getSize() {
return size;
}
public LongMetric getUsed() {
return used;
}
public LongMetric getKeyCount() {
return keyCount;
}
public LongMetric getReadBytes() {
return readBytes;
}
public LongMetric getWriteBytes() {
return writeBytes;
}
public LongMetric getReadCount() {
return readCount;
}
public LongMetric getWriteCount() {
return writeCount;
}
public void add(ContainerStat stat) {
this.size.add(stat.getSize().get());
this.used.add(stat.getUsed().get());
this.keyCount.add(stat.getKeyCount().get());
this.readBytes.add(stat.getReadBytes().get());
this.writeBytes.add(stat.getWriteBytes().get());
this.readCount.add(stat.getReadCount().get());
this.writeCount.add(stat.getWriteCount().get());
}
}

View File

@ -0,0 +1,87 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.scm.container.placement.metrics;
import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.annotation.Metric;
import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
/**
* This class is for maintaining StorageContainerManager statistics.
*/
@Metrics(about="Storage Container Manager Metrics", context="dfs")
public class SCMMetrics {
public static final String SOURCE_NAME =
SCMMetrics.class.getSimpleName();
/**
* Container stat metrics, the meaning of following metrics
* can be found in {@link ContainerStat}.
*/
@Metric private MutableGaugeLong lastContainerReportSize;
@Metric private MutableGaugeLong lastContainerReportUsed;
@Metric private MutableGaugeLong lastContainerReportKeyCount;
@Metric private MutableGaugeLong lastContainerReportReadBytes;
@Metric private MutableGaugeLong lastContainerReportWriteBytes;
@Metric private MutableGaugeLong lastContainerReportReadCount;
@Metric private MutableGaugeLong lastContainerReportWriteCount;
public SCMMetrics() {
}
public static SCMMetrics create() {
MetricsSystem ms = DefaultMetricsSystem.instance();
return ms.register(SOURCE_NAME, "Storage Container Manager Metrics",
new SCMMetrics());
}
public void setLastContainerReportSize(long size) {
this.lastContainerReportSize.set(size);
}
public void setLastContainerReportUsed(long used) {
this.lastContainerReportUsed.set(used);
}
public void setLastContainerReportKeyCount(long keyCount) {
this.lastContainerReportKeyCount.set(keyCount);
}
public void setLastContainerReportReadBytes(long readBytes) {
this.lastContainerReportReadBytes.set(readBytes);
}
public void setLastContainerReportWriteBytes(long writeBytes) {
this.lastContainerReportWriteBytes.set(writeBytes);
}
public void setLastContainerReportReadCount(long readCount) {
this.lastContainerReportReadCount.set(readCount);
}
public void setLastContainerReportWriteCount(long writeCount) {
this.lastContainerReportWriteCount.set(writeCount);
}
public void unRegister() {
MetricsSystem ms = DefaultMetricsSystem.instance();
ms.unregisterSource(SOURCE_NAME);
}
}

View File

@ -98,6 +98,26 @@ RPC operations.
| `GetSmallFile` | Get small file operations |
| `CloseContainer` | Close container operations |
### Storage Container Manager Metrics
The metrics for containers that managed by Storage Container Manager.
Storage Container Manager (SCM) is a master service which keeps track of
replicas of storage containers. It also manages all data nodes and their
states, dealing with container reports and dispatching commands for execution.
Following are the counters for containers:
| Name | Description |
|:---- |:---- |
| `LastContainerReportSize` | Total size in bytes of all containers |
| `LastContainerReportUsed` | Total number of bytes used by all containers |
| `LastContainerReportKeyCount` | Total number of keys in all containers |
| `LastContainerReportReadBytes` | Total number of bytes have been read from all containers |
| `LastContainerReportWriteBytes` | Total number of bytes have been written into all containers |
| `LastContainerReportReadCount` | Total number of times containers have been read from |
| `LastContainerReportWriteCount` | Total number of times containers have been written to |
### Key Space Metrics
The metrics for various key space manager operations in HDFS Ozone.

View File

@ -0,0 +1,116 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.ozone.scm;
import static org.apache.hadoop.test.MetricsAsserts.getLongGauge;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
import static org.junit.Assert.assertEquals;
import java.util.UUID;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.hadoop.conf.OzoneConfiguration;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.ozone.container.common.SCMTestUtils;
import org.apache.hadoop.ozone.container.common.helpers.ContainerReport;
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos;
import org.apache.hadoop.ozone.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto;
import org.apache.hadoop.ozone.scm.container.placement.metrics.ContainerStat;
import org.apache.hadoop.ozone.scm.container.placement.metrics.SCMMetrics;
import org.junit.Test;
/**
* This class tests the metrics of Storage Container Manager.
*/
public class TestSCMMetrics {
private static MiniOzoneCluster cluster = null;
@Test
public void testContainerMetrics() throws Exception {
int nodeCount = 2;
int numReport = 2;
long size = OzoneConsts.GB * 5;
long used = OzoneConsts.GB * 2;
long readBytes = OzoneConsts.GB * 1;
long writeBytes = OzoneConsts.GB * 2;
int keyCount = 1000;
int readCount = 100;
int writeCount = 50;
OzoneConfiguration conf = new OzoneConfiguration();
try {
cluster = new MiniOzoneCluster.Builder(conf)
.setHandlerType(OzoneConsts.OZONE_HANDLER_DISTRIBUTED)
.numDataNodes(nodeCount).build();
ContainerStat stat = new ContainerStat(size, used, keyCount, readBytes,
writeBytes, readCount, writeCount);
StorageContainerManager scmManager = cluster.getStorageContainerManager();
scmManager.sendContainerReport(createContainerReport(numReport, stat));
// verify container stat metrics
MetricsRecordBuilder scmMetrics = getMetrics(SCMMetrics.SOURCE_NAME);
assertEquals(size * numReport,
getLongGauge("LastContainerReportSize", scmMetrics));
assertEquals(used * numReport,
getLongGauge("LastContainerReportUsed", scmMetrics));
assertEquals(readBytes * numReport,
getLongGauge("LastContainerReportReadBytes", scmMetrics));
assertEquals(writeBytes * numReport,
getLongGauge("LastContainerReportWriteBytes", scmMetrics));
assertEquals(keyCount * numReport,
getLongGauge("LastContainerReportKeyCount", scmMetrics));
assertEquals(readCount * numReport,
getLongGauge("LastContainerReportReadCount", scmMetrics));
assertEquals(writeCount * numReport,
getLongGauge("LastContainerReportWriteCount", scmMetrics));
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
private ContainerReportsRequestProto createContainerReport(int numReport,
ContainerStat stat) {
StorageContainerDatanodeProtocolProtos.ContainerReportsRequestProto.Builder
reportsBuilder = StorageContainerDatanodeProtocolProtos
.ContainerReportsRequestProto.newBuilder();
for (int i = 0; i < numReport; i++) {
ContainerReport report = new ContainerReport(
UUID.randomUUID().toString(), DigestUtils.sha256Hex("Simulated"));
report.setSize(stat.getSize().get());
report.setBytesUsed(stat.getUsed().get());
report.setReadCount(stat.getReadCount().get());
report.setReadBytes(stat.getReadBytes().get());
report.setKeyCount(stat.getKeyCount().get());
report.setWriteCount(stat.getWriteCount().get());
report.setWriteBytes(stat.getWriteBytes().get());
reportsBuilder.addReports(report.getProtoBufMessage());
}
reportsBuilder.setDatanodeID(SCMTestUtils.getDatanodeID()
.getProtoBufMessage());
reportsBuilder.setType(StorageContainerDatanodeProtocolProtos
.ContainerReportsRequestProto.reportType.fullReport);
return reportsBuilder.build();
}
}