HDFS-5350. Name Node should report fsimage transfer time as a metric. Contributed by Jimmy Xiang.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551415 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Wang 2013-12-17 00:30:06 +00:00
parent 49ad07af97
commit 0fc2929d13
4 changed files with 57 additions and 0 deletions

View File

@ -741,6 +741,9 @@ Release 2.4.0 - UNRELEASED
HDFS-5652. Refactor invalid block token exception handling in DFSInputStream. HDFS-5652. Refactor invalid block token exception handling in DFSInputStream.
(Liang Xie via junping_du) (Liang Xie via junping_du)
HDFS-5350. Name Node should report fsimage transfer time as a metric.
(Jimmy Xiang via wang)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn)

View File

@ -17,6 +17,8 @@
*/ */
package org.apache.hadoop.hdfs.server.namenode; package org.apache.hadoop.hdfs.server.namenode;
import static org.apache.hadoop.util.Time.now;
import java.security.PrivilegedExceptionAction; import java.security.PrivilegedExceptionAction;
import java.util.*; import java.util.*;
import java.io.*; import java.io.*;
@ -41,6 +43,7 @@
import org.apache.hadoop.hdfs.server.common.JspHelper; import org.apache.hadoop.hdfs.server.common.JspHelper;
import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.hdfs.util.MD5FileUtils; import org.apache.hadoop.hdfs.util.MD5FileUtils;
@ -88,6 +91,7 @@ public void doGet(final HttpServletRequest request,
final GetImageParams parsedParams = new GetImageParams(request, response); final GetImageParams parsedParams = new GetImageParams(request, response);
final Configuration conf = (Configuration) context final Configuration conf = (Configuration) context
.getAttribute(JspHelper.CURRENT_CONF); .getAttribute(JspHelper.CURRENT_CONF);
final NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
if (UserGroupInformation.isSecurityEnabled() && if (UserGroupInformation.isSecurityEnabled() &&
!isValidRequestor(context, request.getUserPrincipal().getName(), conf)) { !isValidRequestor(context, request.getUserPrincipal().getName(), conf)) {
@ -128,14 +132,26 @@ public Void run() throws Exception {
throw new IOException(errorMessage); throw new IOException(errorMessage);
} }
CheckpointFaultInjector.getInstance().beforeGetImageSetsHeaders(); CheckpointFaultInjector.getInstance().beforeGetImageSetsHeaders();
long start = now();
serveFile(imageFile); serveFile(imageFile);
if (metrics != null) { // Metrics non-null only when used inside name node
long elapsed = now() - start;
metrics.addGetImage(elapsed);
}
} else if (parsedParams.isGetEdit()) { } else if (parsedParams.isGetEdit()) {
long startTxId = parsedParams.getStartTxId(); long startTxId = parsedParams.getStartTxId();
long endTxId = parsedParams.getEndTxId(); long endTxId = parsedParams.getEndTxId();
File editFile = nnImage.getStorage() File editFile = nnImage.getStorage()
.findFinalizedEditsFile(startTxId, endTxId); .findFinalizedEditsFile(startTxId, endTxId);
long start = now();
serveFile(editFile); serveFile(editFile);
if (metrics != null) { // Metrics non-null only when used inside name node
long elapsed = now() - start;
metrics.addGetEdit(elapsed);
}
} else if (parsedParams.isPutImage()) { } else if (parsedParams.isPutImage()) {
final long txid = parsedParams.getTxId(); final long txid = parsedParams.getTxId();
@ -159,6 +175,7 @@ public Void run() throws Exception {
UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab(); UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab();
} }
long start = now();
// issue a HTTP get request to download the new fsimage // issue a HTTP get request to download the new fsimage
MD5Hash downloadImageDigest = MD5Hash downloadImageDigest =
TransferFsImage.downloadImageToStorage( TransferFsImage.downloadImageToStorage(
@ -166,6 +183,11 @@ public Void run() throws Exception {
nnImage.getStorage(), true); nnImage.getStorage(), true);
nnImage.saveDigestAndRenameCheckpointImage(txid, downloadImageDigest); nnImage.saveDigestAndRenameCheckpointImage(txid, downloadImageDigest);
if (metrics != null) { // Metrics non-null only when used inside name node
long elapsed = now() - start;
metrics.addPutImage(elapsed);
}
// Now that we have a new checkpoint, we might be able to // Now that we have a new checkpoint, we might be able to
// remove some old ones. // remove some old ones.
nnImage.purgeOldStorage(); nnImage.purgeOldStorage();

View File

@ -89,6 +89,13 @@ public class NameNodeMetrics {
@Metric("Time loading FS Image at startup in msec") @Metric("Time loading FS Image at startup in msec")
MutableGaugeInt fsImageLoadTime; MutableGaugeInt fsImageLoadTime;
@Metric("GetImageServlet getEdit")
MutableRate getEdit;
@Metric("GetImageServlet getImage")
MutableRate getImage;
@Metric("GetImageServlet putImage")
MutableRate putImage;
NameNodeMetrics(String processName, String sessionId, int[] intervals) { NameNodeMetrics(String processName, String sessionId, int[] intervals) {
registry.tag(ProcessName, processName).tag(SessionId, sessionId); registry.tag(ProcessName, processName).tag(SessionId, sessionId);
@ -251,4 +258,16 @@ public void addCacheBlockReport(long latency) {
public void setSafeModeTime(long elapsed) { public void setSafeModeTime(long elapsed) {
safeModeTime.set((int) elapsed); safeModeTime.set((int) elapsed);
} }
public void addGetEdit(long latency) {
getEdit.add(latency);
}
public void addGetImage(long latency) {
getImage.add(latency);
}
public void addPutImage(long latency) {
putImage.add(latency);
}
} }

View File

@ -20,6 +20,9 @@
import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI; import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.assertNNHasCheckpoints; import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.assertNNHasCheckpoints;
import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.getNameNodeCurrentDirs; import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.getNameNodeCurrentDirs;
import static org.apache.hadoop.test.MetricsAsserts.assertCounterGt;
import static org.apache.hadoop.test.MetricsAsserts.assertGaugeGt;
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
@ -72,6 +75,7 @@
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
import org.apache.hadoop.hdfs.tools.DFSAdmin; import org.apache.hadoop.hdfs.tools.DFSAdmin;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.GenericTestUtils.DelayAnswer; import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
import org.apache.hadoop.test.GenericTestUtils.LogCapturer; import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
@ -106,6 +110,7 @@ public class TestCheckpoint {
} }
static final Log LOG = LogFactory.getLog(TestCheckpoint.class); static final Log LOG = LogFactory.getLog(TestCheckpoint.class);
static final String NN_METRICS = "NameNodeActivity";
static final long seed = 0xDEADBEEFL; static final long seed = 0xDEADBEEFL;
static final int blockSize = 4096; static final int blockSize = 4096;
@ -1048,6 +1053,14 @@ public void testCheckpoint() throws IOException {
// //
secondary = startSecondaryNameNode(conf); secondary = startSecondaryNameNode(conf);
secondary.doCheckpoint(); secondary.doCheckpoint();
MetricsRecordBuilder rb = getMetrics(NN_METRICS);
assertCounterGt("GetImageNumOps", 0, rb);
assertCounterGt("GetEditNumOps", 0, rb);
assertCounterGt("PutImageNumOps", 0, rb);
assertGaugeGt("GetImageAvgTime", 0.0, rb);
assertGaugeGt("GetEditAvgTime", 0.0, rb);
assertGaugeGt("PutImageAvgTime", 0.0, rb);
} finally { } finally {
fileSys.close(); fileSys.close();
cleanup(secondary); cleanup(secondary);