HDFS-10282. The VolumeScanner should warn about replica files which are misplaced. Contributed by Colin Patrick McCabe.

2016-04-14 07:58:24 -05:00 · 2016-04-14 07:58:24 -05:00 · 0d1c1152f1
commit 0d1c1152f1
parent df18b6e984
6 changed files with 111 additions and 8 deletions
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
@ -914,13 +914,13 @@ private LinkedList<ScanInfo> compileReport(FsVolumeSpi vol,
     */
    private void verifyFileLocation(File actualBlockFile,
        File bpFinalizedDir, long blockId) {
-      File blockDir = DatanodeUtil.idToBlockDir(bpFinalizedDir, blockId);
+      File expectedBlockDir =
-      if (actualBlockFile.getParentFile().compareTo(blockDir) != 0) {
+          DatanodeUtil.idToBlockDir(bpFinalizedDir, blockId);
-        File expBlockFile = new File(blockDir, actualBlockFile.getName());
+      File actualBlockDir = actualBlockFile.getParentFile();
-        LOG.warn("Block: " + blockId
+      if (actualBlockDir.compareTo(expectedBlockDir) != 0) {
-            + " has to be upgraded to block ID-based layout. "
+        LOG.warn("Block: " + blockId +
-            + "Actual block file path: " + actualBlockFile
+            " found in invalid directory.  Expected directory: " +
-            + ", expected block file path: " + expBlockFile);
+            expectedBlockDir + ".  Actual directory: " + actualBlockDir);
      }
    }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScanner.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/VolumeScanner.java
@ -414,7 +414,7 @@ private long scanBlock(ExtendedBlock cblock, long bytesPerSec) {
      Block b = volume.getDataset().getStoredBlock(
          cblock.getBlockPoolId(), cblock.getBlockId());
      if (b == null) {
-        LOG.info("FileNotFound while finding block {} on volume {}",
+        LOG.info("Replica {} was not found in the VolumeMap for volume {}",
            cblock, volume.getBasePath());
      } else {
        block = new ExtendedBlock(cblock.getBlockPoolId(), b);
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
@ -697,6 +697,18 @@ public ExtendedBlock nextBlock() throws IOException {
            } else {
              ExtendedBlock block =
                  new ExtendedBlock(bpid, Block.filename2id(state.curEntry));
              File expectedBlockDir = DatanodeUtil.idToBlockDir(
                  new File("."), block.getBlockId());
              File actualBlockDir = Paths.get(".",
                  state.curFinalizedDir, state.curFinalizedSubDir).toFile();
              if (!expectedBlockDir.equals(actualBlockDir)) {
                LOG.error("nextBlock({}, {}): block id {} found in invalid " +
                    "directory.  Expected directory: {}.  " +
                    "Actual directory: {}", storageID, bpid,
                    block.getBlockId(), expectedBlockDir.getPath(),
                    actualBlockDir.getPath());
                continue;
              }
              LOG.trace("nextBlock({}, {}): advancing to {}",
                  storageID, bpid, block);
              return block;
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/FsDatasetTestUtils.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/FsDatasetTestUtils.java
@ -135,6 +135,13 @@ interface MaterializedReplica {
     * @throws IOException I/O error.
     */
    void truncateMeta(long newSize) throws IOException;
    /**
     * Make the replica unreachable, perhaps by renaming it to an
     * invalid file name.
     * @throws IOException On I/O error.
     */
    void makeUnreachable() throws IOException;
  }
  /**
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockScanner.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockScanner.java
@ -24,6 +24,7 @@
 import static org.apache.hadoop.hdfs.server.datanode.BlockScanner.Conf.INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertFalse;
 import java.io.Closeable;
 import java.io.File;
@ -38,6 +39,7 @@
 import com.google.common.base.Supplier;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.server.datanode.FsDatasetTestUtils.MaterializedReplica;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
 import org.apache.hadoop.hdfs.server.datanode.VolumeScanner.ScanResultHandler;
 import org.apache.hadoop.conf.Configuration;
@ -139,6 +141,11 @@ public ExtendedBlock getFileBlock(int nsIdx, int fileIdx)
          throws Exception {
      return DFSTestUtil.getFirstBlock(dfs[nsIdx], getPath(fileIdx));
    }
    public MaterializedReplica getMaterializedReplica(int nsIdx, int fileIdx)
        throws Exception {
      return cluster.getMaterializedReplica(0, getFileBlock(nsIdx, fileIdx));
    }
  }
  /**
@ -806,4 +813,60 @@ public Boolean get() {
      info.blocksScanned = 0;
    }
  }
  /**
   * Test that blocks which are in the wrong location are ignored.
   */
  @Test(timeout=120000)
  public void testIgnoreMisplacedBlock() throws Exception {
    Configuration conf = new Configuration();
    // Set a really long scan period.
    conf.setLong(DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
    conf.set(INTERNAL_VOLUME_SCANNER_SCAN_RESULT_HANDLER,
        TestScanResultHandler.class.getName());
    conf.setLong(INTERNAL_DFS_BLOCK_SCANNER_CURSOR_SAVE_INTERVAL_MS, 0L);
    final TestContext ctx = new TestContext(conf, 1);
    final int NUM_FILES = 4;
    ctx.createFiles(0, NUM_FILES, 5);
    MaterializedReplica unreachableReplica = ctx.getMaterializedReplica(0, 1);
    ExtendedBlock unreachableBlock = ctx.getFileBlock(0, 1);
    unreachableReplica.makeUnreachable();
    final TestScanResultHandler.Info info =
        TestScanResultHandler.getInfo(ctx.volumes.get(0));
    String storageID = ctx.volumes.get(0).getStorageID();
    synchronized (info) {
      info.sem = new Semaphore(NUM_FILES);
      info.shouldRun = true;
      info.notify();
    }
    // Scan the first 4 blocks
    LOG.info("Waiting for the blocks to be scanned.");
    GenericTestUtils.waitFor(new Supplier<Boolean>() {
      @Override
      public Boolean get() {
        synchronized (info) {
          if (info.blocksScanned >= NUM_FILES - 1) {
            LOG.info("info = {}.  blockScanned has now reached " +
                info.blocksScanned, info);
            return true;
          } else {
            LOG.info("info = {}.  Waiting for blockScanned to reach " +
                (NUM_FILES - 1), info);
            return false;
          }
        }
      }
    }, 50, 30000);
    // We should have scanned 4 blocks
    synchronized (info) {
      assertFalse(info.goodBlocks.contains(unreachableBlock));
      assertFalse(info.badBlocks.contains(unreachableBlock));
      assertEquals("Expected 3 good blocks.", 3, info.goodBlocks.size());
      info.goodBlocks.clear();
      assertEquals("Expected 3 blocksScanned", 3, info.blocksScanned);
      assertEquals("Did not expect bad blocks.", 0, info.badBlocks.size());
      info.blocksScanned = 0;
    }
    info.sem.release(1);
  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImplTestUtils.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImplTestUtils.java
@ -170,6 +170,27 @@ public void truncateMeta(long newSize) throws IOException {
      truncate(metaFile, newSize);
    }
    @Override
    public void makeUnreachable() throws IOException {
      long blockId = Block.getBlockId(blockFile.getAbsolutePath());
      File origDir = blockFile.getParentFile();
      File root = origDir.getParentFile().getParentFile();
      File newDir = null;
      // Keep incrementing the block ID until the block and metadata
      // files end up in a different directory.  Actually, with the
      // current replica file placement scheme, this should only ever
      // require one increment, but this is a bit of defensive coding.
      do {
        blockId++;
        newDir = DatanodeUtil.idToBlockDir(root, blockId);
      } while (origDir.equals(newDir));
      Files.createDirectories(newDir.toPath());
      Files.move(blockFile.toPath(),
          new File(newDir, blockFile.getName()).toPath());
      Files.move(metaFile.toPath(),
          new File(newDir, metaFile.getName()).toPath());
    }
    @Override
    public String toString() {
      return String.format("MaterializedReplica: file=%s", blockFile);