HDFS-5350. Name Node should report fsimage transfer time as a metric. Contributed by Jimmy Xiang.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551415 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
49ad07af97
commit
0fc2929d13
@ -741,6 +741,9 @@ Release 2.4.0 - UNRELEASED
|
||||
HDFS-5652. Refactor invalid block token exception handling in DFSInputStream.
|
||||
(Liang Xie via junping_du)
|
||||
|
||||
HDFS-5350. Name Node should report fsimage transfer time as a metric.
|
||||
(Jimmy Xiang via wang)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn)
|
||||
|
@ -17,6 +17,8 @@
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.server.namenode;
|
||||
|
||||
import static org.apache.hadoop.util.Time.now;
|
||||
|
||||
import java.security.PrivilegedExceptionAction;
|
||||
import java.util.*;
|
||||
import java.io.*;
|
||||
@ -41,6 +43,7 @@
|
||||
import org.apache.hadoop.hdfs.server.common.JspHelper;
|
||||
import org.apache.hadoop.hdfs.server.common.Storage;
|
||||
import org.apache.hadoop.hdfs.server.common.StorageInfo;
|
||||
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
|
||||
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
|
||||
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
|
||||
import org.apache.hadoop.hdfs.util.MD5FileUtils;
|
||||
@ -88,6 +91,7 @@ public void doGet(final HttpServletRequest request,
|
||||
final GetImageParams parsedParams = new GetImageParams(request, response);
|
||||
final Configuration conf = (Configuration) context
|
||||
.getAttribute(JspHelper.CURRENT_CONF);
|
||||
final NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
|
||||
|
||||
if (UserGroupInformation.isSecurityEnabled() &&
|
||||
!isValidRequestor(context, request.getUserPrincipal().getName(), conf)) {
|
||||
@ -128,14 +132,26 @@ public Void run() throws Exception {
|
||||
throw new IOException(errorMessage);
|
||||
}
|
||||
CheckpointFaultInjector.getInstance().beforeGetImageSetsHeaders();
|
||||
long start = now();
|
||||
serveFile(imageFile);
|
||||
|
||||
if (metrics != null) { // Metrics non-null only when used inside name node
|
||||
long elapsed = now() - start;
|
||||
metrics.addGetImage(elapsed);
|
||||
}
|
||||
} else if (parsedParams.isGetEdit()) {
|
||||
long startTxId = parsedParams.getStartTxId();
|
||||
long endTxId = parsedParams.getEndTxId();
|
||||
|
||||
File editFile = nnImage.getStorage()
|
||||
.findFinalizedEditsFile(startTxId, endTxId);
|
||||
long start = now();
|
||||
serveFile(editFile);
|
||||
|
||||
if (metrics != null) { // Metrics non-null only when used inside name node
|
||||
long elapsed = now() - start;
|
||||
metrics.addGetEdit(elapsed);
|
||||
}
|
||||
} else if (parsedParams.isPutImage()) {
|
||||
final long txid = parsedParams.getTxId();
|
||||
|
||||
@ -159,12 +175,18 @@ public Void run() throws Exception {
|
||||
UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab();
|
||||
}
|
||||
|
||||
long start = now();
|
||||
// issue a HTTP get request to download the new fsimage
|
||||
MD5Hash downloadImageDigest =
|
||||
TransferFsImage.downloadImageToStorage(
|
||||
parsedParams.getInfoServer(conf), txid,
|
||||
nnImage.getStorage(), true);
|
||||
nnImage.saveDigestAndRenameCheckpointImage(txid, downloadImageDigest);
|
||||
|
||||
if (metrics != null) { // Metrics non-null only when used inside name node
|
||||
long elapsed = now() - start;
|
||||
metrics.addPutImage(elapsed);
|
||||
}
|
||||
|
||||
// Now that we have a new checkpoint, we might be able to
|
||||
// remove some old ones.
|
||||
|
@ -89,6 +89,13 @@ public class NameNodeMetrics {
|
||||
@Metric("Time loading FS Image at startup in msec")
|
||||
MutableGaugeInt fsImageLoadTime;
|
||||
|
||||
@Metric("GetImageServlet getEdit")
|
||||
MutableRate getEdit;
|
||||
@Metric("GetImageServlet getImage")
|
||||
MutableRate getImage;
|
||||
@Metric("GetImageServlet putImage")
|
||||
MutableRate putImage;
|
||||
|
||||
NameNodeMetrics(String processName, String sessionId, int[] intervals) {
|
||||
registry.tag(ProcessName, processName).tag(SessionId, sessionId);
|
||||
|
||||
@ -251,4 +258,16 @@ public void addCacheBlockReport(long latency) {
|
||||
public void setSafeModeTime(long elapsed) {
|
||||
safeModeTime.set((int) elapsed);
|
||||
}
|
||||
|
||||
public void addGetEdit(long latency) {
|
||||
getEdit.add(latency);
|
||||
}
|
||||
|
||||
public void addGetImage(long latency) {
|
||||
getImage.add(latency);
|
||||
}
|
||||
|
||||
public void addPutImage(long latency) {
|
||||
putImage.add(latency);
|
||||
}
|
||||
}
|
||||
|
@ -20,6 +20,9 @@
|
||||
import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
|
||||
import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.assertNNHasCheckpoints;
|
||||
import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.getNameNodeCurrentDirs;
|
||||
import static org.apache.hadoop.test.MetricsAsserts.assertCounterGt;
|
||||
import static org.apache.hadoop.test.MetricsAsserts.assertGaugeGt;
|
||||
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
@ -72,6 +75,7 @@
|
||||
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
|
||||
import org.apache.hadoop.hdfs.tools.DFSAdmin;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
|
||||
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
|
||||
@ -106,6 +110,7 @@ public class TestCheckpoint {
|
||||
}
|
||||
|
||||
static final Log LOG = LogFactory.getLog(TestCheckpoint.class);
|
||||
static final String NN_METRICS = "NameNodeActivity";
|
||||
|
||||
static final long seed = 0xDEADBEEFL;
|
||||
static final int blockSize = 4096;
|
||||
@ -1048,6 +1053,14 @@ public void testCheckpoint() throws IOException {
|
||||
//
|
||||
secondary = startSecondaryNameNode(conf);
|
||||
secondary.doCheckpoint();
|
||||
|
||||
MetricsRecordBuilder rb = getMetrics(NN_METRICS);
|
||||
assertCounterGt("GetImageNumOps", 0, rb);
|
||||
assertCounterGt("GetEditNumOps", 0, rb);
|
||||
assertCounterGt("PutImageNumOps", 0, rb);
|
||||
assertGaugeGt("GetImageAvgTime", 0.0, rb);
|
||||
assertGaugeGt("GetEditAvgTime", 0.0, rb);
|
||||
assertGaugeGt("PutImageAvgTime", 0.0, rb);
|
||||
} finally {
|
||||
fileSys.close();
|
||||
cleanup(secondary);
|
||||
|
Loading…
Reference in New Issue
Block a user