HDFS-15781. Add metrics for how blocks are moved in replaceBlock. (#2704)

This commit is contained in:
LeonGao 2021-02-23 22:29:48 -08:00 committed by GitHub
parent e9a3c2950e
commit 940c780feb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 91 additions and 12 deletions

View File

@ -1250,6 +1250,8 @@ public void replaceBlock(final ExtendedBlock block,
LOG.info("Moved {} from {}, delHint={}", LOG.info("Moved {} from {}, delHint={}",
block, peer.getRemoteAddressString(), delHint); block, peer.getRemoteAddressString(), delHint);
datanode.metrics.incrReplaceBlockOpToOtherHost();
} }
} catch (IOException ioe) { } catch (IOException ioe) {
opStatus = ERROR; opStatus = ERROR;

View File

@ -1121,6 +1121,10 @@ public ReplicaInfo moveBlockAcrossStorage(ExtendedBlock block,
} }
try { try {
moveBlock(block, replicaInfo, volumeRef, useVolumeOnSameMount); moveBlock(block, replicaInfo, volumeRef, useVolumeOnSameMount);
datanode.getMetrics().incrReplaceBlockOpOnSameHost();
if (useVolumeOnSameMount) {
datanode.getMetrics().incrReplaceBlockOpOnSameMount();
}
} finally { } finally {
if (volumeRef != null) { if (volumeRef != null) {
volumeRef.close(); volumeRef.close();

View File

@ -188,6 +188,15 @@ public class DataNodeMetrics {
@Metric MutableCounterLong packetsSlowWriteToDisk; @Metric MutableCounterLong packetsSlowWriteToDisk;
@Metric MutableCounterLong packetsSlowWriteToOsCache; @Metric MutableCounterLong packetsSlowWriteToOsCache;
@Metric("Number of replaceBlock ops between" +
" storage types on same host with local copy")
private MutableCounterLong replaceBlockOpOnSameHost;
@Metric("Number of replaceBlock ops between" +
" storage types on same disk mount with same disk tiering feature")
private MutableCounterLong replaceBlockOpOnSameMount;
@Metric("Number of replaceBlock ops to another node")
private MutableCounterLong replaceBlockOpToOtherHost;
final MetricsRegistry registry = new MetricsRegistry("datanode"); final MetricsRegistry registry = new MetricsRegistry("datanode");
@Metric("Milliseconds spent on calling NN rpc") @Metric("Milliseconds spent on calling NN rpc")
private MutableRatesWithAggregation private MutableRatesWithAggregation
@ -711,4 +720,17 @@ public void incrPacketsSlowWriteToDisk() {
public void incrPacketsSlowWriteToOsCache() { public void incrPacketsSlowWriteToOsCache() {
packetsSlowWriteToOsCache.incr(); packetsSlowWriteToOsCache.incr();
} }
public void incrReplaceBlockOpOnSameMount() {
replaceBlockOpOnSameMount.incr();
}
public void incrReplaceBlockOpOnSameHost() {
replaceBlockOpOnSameHost.incr();
}
public void incrReplaceBlockOpToOtherHost() {
replaceBlockOpToOtherHost.incr();
}
} }

View File

@ -34,6 +34,8 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SERVER_HTTPS_KEYSTORE_RESOURCE_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SERVER_HTTPS_KEYSTORE_RESOURCE_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_PRINCIPAL_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_PRINCIPAL_KEY;
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_DATA_TRANSFER_PROTECTION_KEY; import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_DATA_TRANSFER_PROTECTION_KEY;
import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
@ -83,6 +85,7 @@
import org.apache.hadoop.hdfs.server.mover.Mover.MLocation; import org.apache.hadoop.hdfs.server.mover.Mover.MLocation;
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.http.HttpConfig;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.minikdc.MiniKdc; import org.apache.hadoop.minikdc.MiniKdc;
import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
@ -170,14 +173,26 @@ public void testScheduleSameBlock() throws IOException {
} }
} }
private void testWithinSameNode(Configuration conf) throws Exception { private void testMovementWithLocalityOption(Configuration conf,
final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) boolean sameNode) throws Exception {
final MiniDFSCluster cluster;
if (sameNode) {
cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(3) .numDataNodes(3)
.storageTypes( .storageTypes(
new StorageType[] {StorageType.DISK, StorageType.ARCHIVE}) new StorageType[] {StorageType.DISK, StorageType.ARCHIVE})
.build(); .build();
} else {
cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(2)
.storageTypes(
new StorageType[][] {{StorageType.DISK}, {StorageType.ARCHIVE}})
.build();
}
try { try {
cluster.waitActive(); cluster.waitActive();
final DistributedFileSystem dfs = cluster.getFileSystem(); final DistributedFileSystem dfs = cluster.getFileSystem();
final String file = "/testScheduleWithinSameNode/file"; final String file = "/testScheduleWithinSameNode/file";
Path dir = new Path("/testScheduleWithinSameNode"); Path dir = new Path("/testScheduleWithinSameNode");
@ -201,12 +216,37 @@ private void testWithinSameNode(Configuration conf) throws Exception {
Assert.assertEquals("Movement to ARCHIVE should be successful", 0, rc); Assert.assertEquals("Movement to ARCHIVE should be successful", 0, rc);
// Wait till namenode notified about the block location details // Wait till namenode notified about the block location details
waitForLocatedBlockWithArchiveStorageType(dfs, file, 3); waitForLocatedBlockWithArchiveStorageType(dfs, file, sameNode ? 3 : 1);
MetricsRecordBuilder rb =
getMetrics(cluster.getDataNodes().get(1).getMetrics().name());
if (!sameNode) {
testReplaceBlockOpLocalityMetrics(0, 0, 1, rb);
} else if (conf.getBoolean(
DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING, false)) {
testReplaceBlockOpLocalityMetrics(1, 1, 0, rb);
} else {
testReplaceBlockOpLocalityMetrics(1, 0, 0, rb);
}
} finally { } finally {
cluster.shutdown(); cluster.shutdown();
} }
} }
private void testReplaceBlockOpLocalityMetrics(
long sameHost,
long sameMount,
long otherHost,
MetricsRecordBuilder rb) {
assertCounter("ReplaceBlockOpOnSameHost",
sameHost, rb);
assertCounter("ReplaceBlockOpOnSameMount",
sameMount, rb);
assertCounter("ReplaceBlockOpToOtherHost",
otherHost, rb);
}
private void setupStoragePoliciesAndPaths(DistributedFileSystem dfs1, private void setupStoragePoliciesAndPaths(DistributedFileSystem dfs1,
DistributedFileSystem dfs2, DistributedFileSystem dfs2,
Path dir, String file) Path dir, String file)
@ -441,17 +481,27 @@ public Boolean get() {
}, 100, 3000); }, 100, 3000);
} }
/**
* Test block movement with different block locality scenarios.
* 1) Block will be copied to local host,
* if there is target storage type on same datanode.
* 2) Block will be moved within local mount with hardlink,
* if disk/archive are on same mount with same-disk-tiering feature on.
* 3) Block will be moved to another datanode,
* if there is no available target storage type on local datanode.
*/
@Test @Test
public void testScheduleBlockWithinSameNode() throws Exception { public void testScheduleBlockLocality() throws Exception {
final Configuration conf = new HdfsConfiguration(); final Configuration conf = new HdfsConfiguration();
initConf(conf); initConf(conf);
testWithinSameNode(conf); testMovementWithLocalityOption(conf, true);
// Test movement with hardlink, when same disk tiering is enabled. // Test movement with hardlink, when same disk tiering is enabled.
conf.setBoolean(DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING, true); conf.setBoolean(DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING, true);
conf.setDouble(DFSConfigKeys conf.setDouble(DFSConfigKeys
.DFS_DATANODE_RESERVE_FOR_ARCHIVE_DEFAULT_PERCENTAGE, 0.5); .DFS_DATANODE_RESERVE_FOR_ARCHIVE_DEFAULT_PERCENTAGE, 0.5);
testWithinSameNode(conf); testMovementWithLocalityOption(conf, true);
conf.setBoolean(DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING, false); conf.setBoolean(DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING, false);
testMovementWithLocalityOption(conf, false);
} }
private void checkMovePaths(List<Path> actual, Path... expected) { private void checkMovePaths(List<Path> actual, Path... expected) {
@ -1006,7 +1056,8 @@ private void initSecureConf(Configuration conf) throws Exception {
/** /**
* Test Mover runs fine when logging in with a keytab in kerberized env. * Test Mover runs fine when logging in with a keytab in kerberized env.
* Reusing testWithinSameNode here for basic functionality testing. * Reusing testMovementWithLocalityOption
* here for basic functionality testing.
*/ */
@Test(timeout = 300000) @Test(timeout = 300000)
public void testMoverWithKeytabs() throws Exception { public void testMoverWithKeytabs() throws Exception {
@ -1020,7 +1071,7 @@ public void testMoverWithKeytabs() throws Exception {
@Override @Override
public Void run() throws Exception { public Void run() throws Exception {
// verify that mover runs Ok. // verify that mover runs Ok.
testWithinSameNode(conf); testMovementWithLocalityOption(conf, true);
// verify that UGI was logged in using keytab. // verify that UGI was logged in using keytab.
Assert.assertTrue(UserGroupInformation.isLoginKeytabBased()); Assert.assertTrue(UserGroupInformation.isLoginKeytabBased());
return null; return null;