HDFS-5350. Name Node should report fsimage transfer time as a metric. Contributed by Jimmy Xiang.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551415 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
49ad07af97
commit
0fc2929d13
@ -741,6 +741,9 @@ Release 2.4.0 - UNRELEASED
|
|||||||
HDFS-5652. Refactor invalid block token exception handling in DFSInputStream.
|
HDFS-5652. Refactor invalid block token exception handling in DFSInputStream.
|
||||||
(Liang Xie via junping_du)
|
(Liang Xie via junping_du)
|
||||||
|
|
||||||
|
HDFS-5350. Name Node should report fsimage transfer time as a metric.
|
||||||
|
(Jimmy Xiang via wang)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn)
|
HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn)
|
||||||
|
@ -17,6 +17,8 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs.server.namenode;
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.util.Time.now;
|
||||||
|
|
||||||
import java.security.PrivilegedExceptionAction;
|
import java.security.PrivilegedExceptionAction;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
@ -41,6 +43,7 @@
|
|||||||
import org.apache.hadoop.hdfs.server.common.JspHelper;
|
import org.apache.hadoop.hdfs.server.common.JspHelper;
|
||||||
import org.apache.hadoop.hdfs.server.common.Storage;
|
import org.apache.hadoop.hdfs.server.common.Storage;
|
||||||
import org.apache.hadoop.hdfs.server.common.StorageInfo;
|
import org.apache.hadoop.hdfs.server.common.StorageInfo;
|
||||||
|
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
|
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
|
||||||
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
|
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
|
||||||
import org.apache.hadoop.hdfs.util.MD5FileUtils;
|
import org.apache.hadoop.hdfs.util.MD5FileUtils;
|
||||||
@ -88,6 +91,7 @@ public void doGet(final HttpServletRequest request,
|
|||||||
final GetImageParams parsedParams = new GetImageParams(request, response);
|
final GetImageParams parsedParams = new GetImageParams(request, response);
|
||||||
final Configuration conf = (Configuration) context
|
final Configuration conf = (Configuration) context
|
||||||
.getAttribute(JspHelper.CURRENT_CONF);
|
.getAttribute(JspHelper.CURRENT_CONF);
|
||||||
|
final NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
|
||||||
|
|
||||||
if (UserGroupInformation.isSecurityEnabled() &&
|
if (UserGroupInformation.isSecurityEnabled() &&
|
||||||
!isValidRequestor(context, request.getUserPrincipal().getName(), conf)) {
|
!isValidRequestor(context, request.getUserPrincipal().getName(), conf)) {
|
||||||
@ -128,14 +132,26 @@ public Void run() throws Exception {
|
|||||||
throw new IOException(errorMessage);
|
throw new IOException(errorMessage);
|
||||||
}
|
}
|
||||||
CheckpointFaultInjector.getInstance().beforeGetImageSetsHeaders();
|
CheckpointFaultInjector.getInstance().beforeGetImageSetsHeaders();
|
||||||
|
long start = now();
|
||||||
serveFile(imageFile);
|
serveFile(imageFile);
|
||||||
|
|
||||||
|
if (metrics != null) { // Metrics non-null only when used inside name node
|
||||||
|
long elapsed = now() - start;
|
||||||
|
metrics.addGetImage(elapsed);
|
||||||
|
}
|
||||||
} else if (parsedParams.isGetEdit()) {
|
} else if (parsedParams.isGetEdit()) {
|
||||||
long startTxId = parsedParams.getStartTxId();
|
long startTxId = parsedParams.getStartTxId();
|
||||||
long endTxId = parsedParams.getEndTxId();
|
long endTxId = parsedParams.getEndTxId();
|
||||||
|
|
||||||
File editFile = nnImage.getStorage()
|
File editFile = nnImage.getStorage()
|
||||||
.findFinalizedEditsFile(startTxId, endTxId);
|
.findFinalizedEditsFile(startTxId, endTxId);
|
||||||
|
long start = now();
|
||||||
serveFile(editFile);
|
serveFile(editFile);
|
||||||
|
|
||||||
|
if (metrics != null) { // Metrics non-null only when used inside name node
|
||||||
|
long elapsed = now() - start;
|
||||||
|
metrics.addGetEdit(elapsed);
|
||||||
|
}
|
||||||
} else if (parsedParams.isPutImage()) {
|
} else if (parsedParams.isPutImage()) {
|
||||||
final long txid = parsedParams.getTxId();
|
final long txid = parsedParams.getTxId();
|
||||||
|
|
||||||
@ -159,12 +175,18 @@ public Void run() throws Exception {
|
|||||||
UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab();
|
UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long start = now();
|
||||||
// issue a HTTP get request to download the new fsimage
|
// issue a HTTP get request to download the new fsimage
|
||||||
MD5Hash downloadImageDigest =
|
MD5Hash downloadImageDigest =
|
||||||
TransferFsImage.downloadImageToStorage(
|
TransferFsImage.downloadImageToStorage(
|
||||||
parsedParams.getInfoServer(conf), txid,
|
parsedParams.getInfoServer(conf), txid,
|
||||||
nnImage.getStorage(), true);
|
nnImage.getStorage(), true);
|
||||||
nnImage.saveDigestAndRenameCheckpointImage(txid, downloadImageDigest);
|
nnImage.saveDigestAndRenameCheckpointImage(txid, downloadImageDigest);
|
||||||
|
|
||||||
|
if (metrics != null) { // Metrics non-null only when used inside name node
|
||||||
|
long elapsed = now() - start;
|
||||||
|
metrics.addPutImage(elapsed);
|
||||||
|
}
|
||||||
|
|
||||||
// Now that we have a new checkpoint, we might be able to
|
// Now that we have a new checkpoint, we might be able to
|
||||||
// remove some old ones.
|
// remove some old ones.
|
||||||
|
@ -89,6 +89,13 @@ public class NameNodeMetrics {
|
|||||||
@Metric("Time loading FS Image at startup in msec")
|
@Metric("Time loading FS Image at startup in msec")
|
||||||
MutableGaugeInt fsImageLoadTime;
|
MutableGaugeInt fsImageLoadTime;
|
||||||
|
|
||||||
|
@Metric("GetImageServlet getEdit")
|
||||||
|
MutableRate getEdit;
|
||||||
|
@Metric("GetImageServlet getImage")
|
||||||
|
MutableRate getImage;
|
||||||
|
@Metric("GetImageServlet putImage")
|
||||||
|
MutableRate putImage;
|
||||||
|
|
||||||
NameNodeMetrics(String processName, String sessionId, int[] intervals) {
|
NameNodeMetrics(String processName, String sessionId, int[] intervals) {
|
||||||
registry.tag(ProcessName, processName).tag(SessionId, sessionId);
|
registry.tag(ProcessName, processName).tag(SessionId, sessionId);
|
||||||
|
|
||||||
@ -251,4 +258,16 @@ public void addCacheBlockReport(long latency) {
|
|||||||
public void setSafeModeTime(long elapsed) {
|
public void setSafeModeTime(long elapsed) {
|
||||||
safeModeTime.set((int) elapsed);
|
safeModeTime.set((int) elapsed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addGetEdit(long latency) {
|
||||||
|
getEdit.add(latency);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addGetImage(long latency) {
|
||||||
|
getImage.add(latency);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addPutImage(long latency) {
|
||||||
|
putImage.add(latency);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
|
import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
|
||||||
import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.assertNNHasCheckpoints;
|
import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.assertNNHasCheckpoints;
|
||||||
import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.getNameNodeCurrentDirs;
|
import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.getNameNodeCurrentDirs;
|
||||||
|
import static org.apache.hadoop.test.MetricsAsserts.assertCounterGt;
|
||||||
|
import static org.apache.hadoop.test.MetricsAsserts.assertGaugeGt;
|
||||||
|
import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
@ -72,6 +75,7 @@
|
|||||||
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
|
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
|
||||||
import org.apache.hadoop.hdfs.tools.DFSAdmin;
|
import org.apache.hadoop.hdfs.tools.DFSAdmin;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
|
import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
|
||||||
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
|
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
|
||||||
@ -106,6 +110,7 @@ public class TestCheckpoint {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static final Log LOG = LogFactory.getLog(TestCheckpoint.class);
|
static final Log LOG = LogFactory.getLog(TestCheckpoint.class);
|
||||||
|
static final String NN_METRICS = "NameNodeActivity";
|
||||||
|
|
||||||
static final long seed = 0xDEADBEEFL;
|
static final long seed = 0xDEADBEEFL;
|
||||||
static final int blockSize = 4096;
|
static final int blockSize = 4096;
|
||||||
@ -1048,6 +1053,14 @@ public void testCheckpoint() throws IOException {
|
|||||||
//
|
//
|
||||||
secondary = startSecondaryNameNode(conf);
|
secondary = startSecondaryNameNode(conf);
|
||||||
secondary.doCheckpoint();
|
secondary.doCheckpoint();
|
||||||
|
|
||||||
|
MetricsRecordBuilder rb = getMetrics(NN_METRICS);
|
||||||
|
assertCounterGt("GetImageNumOps", 0, rb);
|
||||||
|
assertCounterGt("GetEditNumOps", 0, rb);
|
||||||
|
assertCounterGt("PutImageNumOps", 0, rb);
|
||||||
|
assertGaugeGt("GetImageAvgTime", 0.0, rb);
|
||||||
|
assertGaugeGt("GetEditAvgTime", 0.0, rb);
|
||||||
|
assertGaugeGt("PutImageAvgTime", 0.0, rb);
|
||||||
} finally {
|
} finally {
|
||||||
fileSys.close();
|
fileSys.close();
|
||||||
cleanup(secondary);
|
cleanup(secondary);
|
||||||
|
Loading…
Reference in New Issue
Block a user