HDFS-12409. Add metrics of execution time of different stages in EC recovery task. (Lei (Eddy) Xu)
This commit is contained in:
parent
c3f35c422b
commit
73aed34dff
@ -22,6 +22,7 @@
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
|
||||
import org.apache.hadoop.util.Time;
|
||||
|
||||
/**
|
||||
* StripedBlockReconstructor reconstruct one or more missed striped block in
|
||||
@ -83,18 +84,28 @@ void reconstruct() throws IOException {
|
||||
final int toReconstructLen =
|
||||
(int) Math.min(getStripedReader().getBufferSize(), remaining);
|
||||
|
||||
long start = Time.monotonicNow();
|
||||
// step1: read from minimum source DNs required for reconstruction.
|
||||
// The returned success list is the source DNs we do real read from
|
||||
getStripedReader().readMinimumSources(toReconstructLen);
|
||||
long readEnd = Time.monotonicNow();
|
||||
|
||||
// step2: decode to reconstruct targets
|
||||
reconstructTargets(toReconstructLen);
|
||||
long decodeEnd = Time.monotonicNow();
|
||||
|
||||
// step3: transfer data
|
||||
if (stripedWriter.transferData2Targets() == 0) {
|
||||
String error = "Transfer failed for all targets.";
|
||||
throw new IOException(error);
|
||||
}
|
||||
long writeEnd = Time.monotonicNow();
|
||||
|
||||
// Only the succeed reconstructions are recorded.
|
||||
final DataNodeMetrics metrics = getDatanode().getMetrics();
|
||||
metrics.incrECReconstructionReadTime(readEnd - start);
|
||||
metrics.incrECReconstructionDecodingTime(decodeEnd - readEnd);
|
||||
metrics.incrECReconstructionWriteTime(writeEnd - decodeEnd);
|
||||
|
||||
updatePositionInBlock(toReconstructLen);
|
||||
|
||||
|
@ -151,6 +151,12 @@ public class DataNodeMetrics {
|
||||
MutableCounterLong ecReconstructionBytesWritten;
|
||||
@Metric("Bytes remote read by erasure coding worker")
|
||||
MutableCounterLong ecReconstructionRemoteBytesRead;
|
||||
@Metric("Milliseconds spent on read by erasure coding worker")
|
||||
private MutableCounterLong ecReconstructionReadTimeMillis;
|
||||
@Metric("Milliseconds spent on decoding by erasure coding worker")
|
||||
private MutableCounterLong ecReconstructionDecodingTimeMillis;
|
||||
@Metric("Milliseconds spent on write by erasure coding worker")
|
||||
private MutableCounterLong ecReconstructionWriteTimeMillis;
|
||||
|
||||
final MetricsRegistry registry = new MetricsRegistry("datanode");
|
||||
final String name;
|
||||
@ -503,4 +509,16 @@ public void incrECReconstructionRemoteBytesRead(long bytes) {
|
||||
public void incrECReconstructionBytesWritten(long bytes) {
|
||||
ecReconstructionBytesWritten.incr(bytes);
|
||||
}
|
||||
|
||||
public void incrECReconstructionReadTime(long millis) {
|
||||
ecReconstructionReadTimeMillis.incr(millis);
|
||||
}
|
||||
|
||||
public void incrECReconstructionWriteTime(long millis) {
|
||||
ecReconstructionWriteTimeMillis.incr(millis);
|
||||
}
|
||||
|
||||
public void incrECReconstructionDecodingTime(long millis) {
|
||||
ecReconstructionDecodingTimeMillis.incr(millis);
|
||||
}
|
||||
}
|
||||
|
@ -90,6 +90,10 @@ public void tearDown() {
|
||||
|
||||
@Test(timeout = 120000)
|
||||
public void testFullBlock() throws Exception {
|
||||
Assert.assertEquals(0, getLongMetric("EcReconstructionReadTimeMillis"));
|
||||
Assert.assertEquals(0, getLongMetric("EcReconstructionDecodingTimeMillis"));
|
||||
Assert.assertEquals(0, getLongMetric("EcReconstructionWriteTimeMillis"));
|
||||
|
||||
doTest("/testEcMetrics", blockGroupSize, 0);
|
||||
|
||||
Assert.assertEquals("EcReconstructionTasks should be ",
|
||||
@ -103,6 +107,9 @@ public void testFullBlock() throws Exception {
|
||||
blockSize, getLongMetric("EcReconstructionBytesWritten"));
|
||||
Assert.assertEquals("EcReconstructionRemoteBytesRead should be ",
|
||||
0, getLongMetricWithoutCheck("EcReconstructionRemoteBytesRead"));
|
||||
Assert.assertTrue(getLongMetric("EcReconstructionReadTimeMillis") > 0);
|
||||
Assert.assertTrue(getLongMetric("EcReconstructionDecodingTimeMillis") > 0);
|
||||
Assert.assertTrue(getLongMetric("EcReconstructionWriteTimeMillis") > 0);
|
||||
}
|
||||
|
||||
// A partial block, reconstruct the partial block
|
||||
|
Loading…
Reference in New Issue
Block a user