From e452163a06daa6bbebc571127754962d8776a925 Mon Sep 17 00:00:00 2001 From: Chen Liang Date: Mon, 18 May 2020 10:58:52 -0700 Subject: [PATCH] HDFS-15293. Relax the condition for accepting a fsimage when receiving a checkpoint. Contributed by Chen Liang (cherry picked from commit 7bb902bc0d0c62d63a8960db444de3abb0a6ad22) --- .../hdfs/server/namenode/ImageServlet.java | 39 ++++++++++---- .../hdfs/server/namenode/TestCheckpoint.java | 53 ++++++++++++++++++- 2 files changed, 81 insertions(+), 11 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java index 91f24dd113..a9c2a09ed4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageServlet.java @@ -99,6 +99,19 @@ public class ImageServlet extends HttpServlet { "recent.image.check.enabled"; public static final boolean RECENT_IMAGE_CHECK_ENABLED_DEFAULT = true; + /* + * Specify a relaxation for the time delta check, the relaxation is to account + * for the scenario that there are chances that minor time difference (e.g. + * due to image upload delay, or minor machine clock skew) can cause ANN to + * reject a fsImage too aggressively. + */ + private static double recentImageCheckTimePrecision = 0.75; + + @VisibleForTesting + static void setRecentImageCheckTimePrecision(double ratio) { + recentImageCheckTimePrecision = ratio; + } + @Override public void doGet(final HttpServletRequest request, final HttpServletResponse response) throws ServletException, IOException { @@ -592,6 +605,9 @@ public Void run() throws Exception { long checkpointPeriod = conf.getTimeDuration(DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT, TimeUnit.SECONDS); + checkpointPeriod = Math.round( + checkpointPeriod * recentImageCheckTimePrecision); + long checkpointTxnCount = conf.getLong(DFS_NAMENODE_CHECKPOINT_TXNS_KEY, DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT); @@ -612,21 +628,24 @@ public Void run() throws Exception { // a new fsImage // 1. most recent image's txid is too far behind // 2. last checkpoint time was too old - response.sendError(HttpServletResponse.SC_CONFLICT, - "Most recent checkpoint is neither too far behind in " - + "txid, nor too old. New txnid cnt is " - + (txid - lastCheckpointTxid) - + ", expecting at least " + checkpointTxnCount - + " unless too long since last upload."); + String message = "Rejecting a fsimage due to small time delta " + + "and txnid delta. Time since previous checkpoint is " + + timeDelta + " expecting at least " + checkpointPeriod + + " txnid delta since previous checkpoint is " + + (txid - lastCheckpointTxid) + " expecting at least " + + checkpointTxnCount; + LOG.info(message); + response.sendError(HttpServletResponse.SC_CONFLICT, message); return null; } try { if (nnImage.getStorage().findImageFile(nnf, txid) != null) { - response.sendError(HttpServletResponse.SC_CONFLICT, - "Either current namenode has checkpointed or " - + "another checkpointer already uploaded an " - + "checkpoint for txid " + txid); + String message = "Either current namenode has checkpointed or " + + "another checkpointer already uploaded an " + + "checkpoint for txid " + txid; + LOG.info(message); + response.sendError(HttpServletResponse.SC_CONFLICT, message); return null; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java index 572ad8bef7..fabe8c5cf0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java @@ -2464,7 +2464,7 @@ public void testLegacyOivImage() throws Exception { } @Test(timeout = 300000) - public void testActiveRejectSmallerDeltaImage() throws Exception { + public void testActiveRejectSmallerTxidDeltaImage() throws Exception { MiniDFSCluster cluster = null; Configuration conf = new HdfsConfiguration(); // Set the delta txid threshold to 10 @@ -2517,6 +2517,57 @@ public void testActiveRejectSmallerDeltaImage() throws Exception { } } + /** + * Test that even with txid and time delta threshold, by having time + * relaxation, SBN can still upload images to ANN. + * + * @throws Exception + */ + @Test + public void testActiveImageWithTimeDeltaRelaxation() throws Exception { + Configuration conf = new HdfsConfiguration(); + // Set the delta txid threshold to some arbitrarily large value, so + // it does not trigger a checkpoint during this test. + conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 1000000); + // Set the delta time threshold to some arbitrarily large value, so + // it does not trigger a checkpoint during this test. + conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 900000); + // Set relaxation to 0, means time delta = 0 from previous image is fine, + // this will effectively disable reject small delta image + ImageServlet.setRecentImageCheckTimePrecision(0); + + SecondaryNameNode secondary = null; + + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(0).format(true).build()) { + // enable small delta rejection + NameNode active = cluster.getNameNode(); + active.httpServer.getHttpServer() + .setAttribute(RECENT_IMAGE_CHECK_ENABLED, true); + + secondary = startSecondaryNameNode(conf); + + FileSystem fs = cluster.getFileSystem(); + assertEquals(0, active.getNamesystem().getFSImage() + .getMostRecentCheckpointTxId()); + + // create 5 dir. + for (int i = 0; i < 5; i++) { + fs.mkdirs(new Path("dir-" + i)); + } + + // Checkpoint 1st + secondary.doCheckpoint(); + // at this point, despite this is a small delta change, w.r.t both + // txid and time delta, due to we set relaxation to 0, this image + // still gets accepted + assertEquals(9, active.getNamesystem().getFSImage() + .getMostRecentCheckpointTxId()); + } finally { + cleanup(secondary); + } + } + private static void cleanup(SecondaryNameNode snn) { if (snn != null) { try {