HDFS-5350. Name Node should report fsimage transfer time as a metric. Contributed by Jimmy Xiang.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551415 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Wang 2013-12-17 00:30:06 +00:00
parent 49ad07af97
commit 0fc2929d13
4 changed files with 57 additions and 0 deletions

View File

@ -741,6 +741,9 @@ Release 2.4.0 - UNRELEASED
HDFS-5652. Refactor invalid block token exception handling in DFSInputStream.
(Liang Xie via junping_du)
HDFS-5350. Name Node should report fsimage transfer time as a metric.
(Jimmy Xiang via wang)
OPTIMIZATIONS
HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn)

View File

@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
import static org.apache.hadoop.util.Time.now;
import java.security.PrivilegedExceptionAction;
import java.util.*;
import java.io.*;
@ -41,6 +43,7 @@
import org.apache.hadoop.hdfs.server.common.JspHelper;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.hdfs.util.MD5FileUtils;
@ -88,6 +91,7 @@ public void doGet(final HttpServletRequest request,
final GetImageParams parsedParams = new GetImageParams(request, response);
final Configuration conf = (Configuration) context
.getAttribute(JspHelper.CURRENT_CONF);
final NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
if (UserGroupInformation.isSecurityEnabled() &&
!isValidRequestor(context, request.getUserPrincipal().getName(), conf)) {
@ -128,14 +132,26 @@ public Void run() throws Exception {
throw new IOException(errorMessage);
}
CheckpointFaultInjector.getInstance().beforeGetImageSetsHeaders();
long start = now();
serveFile(imageFile);
if (metrics != null) { // Metrics non-null only when used inside name node
long elapsed = now() - start;
metrics.addGetImage(elapsed);
}
} else if (parsedParams.isGetEdit()) {
long startTxId = parsedParams.getStartTxId();
long endTxId = parsedParams.getEndTxId();
File editFile = nnImage.getStorage()
.findFinalizedEditsFile(startTxId, endTxId);
long start = now();
serveFile(editFile);
if (metrics != null) { // Metrics non-null only when used inside name node
long elapsed = now() - start;
metrics.addGetEdit(elapsed);
}
} else if (parsedParams.isPutImage()) {
final long txid = parsedParams.getTxId();
@ -159,6 +175,7 @@ public Void run() throws Exception {
UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab();
}
long start = now();
// issue a HTTP get request to download the new fsimage
MD5Hash downloadImageDigest =
TransferFsImage.downloadImageToStorage(
@ -166,6 +183,11 @@ public Void run() throws Exception {
nnImage.getStorage(), true);
nnImage.saveDigestAndRenameCheckpointImage(txid, downloadImageDigest);
if (metrics != null) { // Metrics non-null only when used inside name node
long elapsed = now() - start;
metrics.addPutImage(elapsed);
}
// Now that we have a new checkpoint, we might be able to
// remove some old ones.
nnImage.purgeOldStorage();

View File

@ -89,6 +89,13 @@ public class NameNodeMetrics {
@Metric("Time loading FS Image at startup in msec")
MutableGaugeInt fsImageLoadTime;
@Metric("GetImageServlet getEdit")
MutableRate getEdit;
@Metric("GetImageServlet getImage")
MutableRate getImage;
@Metric("GetImageServlet putImage")
MutableRate putImage;
NameNodeMetrics(String processName, String sessionId, int[] intervals) {
registry.tag(ProcessName, processName).tag(SessionId, sessionId);
@ -251,4 +258,16 @@ public void addCacheBlockReport(long latency) {
public void setSafeModeTime(long elapsed) {
safeModeTime.set((int) elapsed);
}
public void addGetEdit(long latency) {
getEdit.add(latency);
}
public void addGetImage(long latency) {
getImage.add(latency);
}
public void addPutImage(long latency) {
putImage.add(latency);
}
}

View File

@ -20,6 +20,9 @@
import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.assertNNHasCheckpoints;
import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.getNameNodeCurrentDirs;
import static org.apache.hadoop.test.MetricsAsserts.assertCounterGt;
import static org.apache.hadoop.test.MetricsAsserts.assertGaugeGt;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
@ -72,6 +75,7 @@
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
import org.apache.hadoop.hdfs.tools.DFSAdmin;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
@ -106,6 +110,7 @@ public class TestCheckpoint {
}
static final Log LOG = LogFactory.getLog(TestCheckpoint.class);
static final String NN_METRICS = "NameNodeActivity";
static final long seed = 0xDEADBEEFL;
static final int blockSize = 4096;
@ -1048,6 +1053,14 @@ public void testCheckpoint() throws IOException {
//
secondary = startSecondaryNameNode(conf);
secondary.doCheckpoint();
MetricsRecordBuilder rb = getMetrics(NN_METRICS);
assertCounterGt("GetImageNumOps", 0, rb);
assertCounterGt("GetEditNumOps", 0, rb);
assertCounterGt("PutImageNumOps", 0, rb);
assertGaugeGt("GetImageAvgTime", 0.0, rb);
assertGaugeGt("GetEditAvgTime", 0.0, rb);
assertGaugeGt("PutImageAvgTime", 0.0, rb);
} finally {
fileSys.close();
cleanup(secondary);