From 4feef863721ba88c9cbf4557502e2082dfca7c40 Mon Sep 17 00:00:00 2001 From: Eli Collins Date: Thu, 22 Mar 2012 21:11:18 +0000 Subject: [PATCH] HDFS-3044. fsck move should be non-destructive by default. Contributed by Colin Patrick McCabe git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1304063 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../hdfs/server/namenode/NamenodeFsck.java | 66 +++++++++++-------- .../hadoop/hdfs/server/namenode/TestFsck.java | 20 +++++- 3 files changed, 57 insertions(+), 32 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index e908f76f55..ccf9a2bc9f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -252,6 +252,9 @@ Release 0.23.3 - UNRELEASED HDFS-3086. Change Datanode not to send storage list in registration. (szetszwo) + HDFS-3044. fsck move should be non-destructive by default. + (Colin Patrick McCabe via eli) + OPTIMIZATIONS HDFS-3024. Improve performance of stringification in addStoredBlock (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index 0d74c8d54a..8763c93e3b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -85,13 +85,6 @@ public class NamenodeFsck { public static final String NONEXISTENT_STATUS = "does not exist"; public static final String FAILURE_STATUS = "FAILED"; - /** Don't attempt any fixing . */ - public static final int FIXING_NONE = 0; - /** Move corrupted files to /lost+found . */ - public static final int FIXING_MOVE = 1; - /** Delete corrupted files. */ - public static final int FIXING_DELETE = 2; - private final NameNode namenode; private final NetworkTopology networktopology; private final int totalDatanodes; @@ -107,7 +100,21 @@ public class NamenodeFsck { private boolean showLocations = false; private boolean showRacks = false; private boolean showCorruptFileBlocks = false; - private int fixing = FIXING_NONE; + + /** + * True if the user specified the -move option. + * + * Whe this option is in effect, we will copy salvaged blocks into the lost + * and found. */ + private boolean doMove = false; + + /** + * True if the user specified the -delete option. + * + * Whe this option is in effect, we will delete corrupted files. + */ + private boolean doDelete = false; + private String path = "/"; // We return back N files that are corrupt; the list of files returned is @@ -144,8 +151,8 @@ public class NamenodeFsck { for (Iterator it = pmap.keySet().iterator(); it.hasNext();) { String key = it.next(); if (key.equals("path")) { this.path = pmap.get("path")[0]; } - else if (key.equals("move")) { this.fixing = FIXING_MOVE; } - else if (key.equals("delete")) { this.fixing = FIXING_DELETE; } + else if (key.equals("move")) { this.doMove = true; } + else if (key.equals("delete")) { this.doDelete = true; } else if (key.equals("files")) { this.showFiles = true; } else if (key.equals("blocks")) { this.showBlocks = true; } else if (key.equals("locations")) { this.showLocations = true; } @@ -377,16 +384,20 @@ private void check(String parent, HdfsFileStatus file, Result res) throws IOExce + " blocks of total size " + missize + " B."); } res.corruptFiles++; - switch(fixing) { - case FIXING_NONE: - break; - case FIXING_MOVE: - if (!isOpen) - lostFoundMove(parent, file, blocks); - break; - case FIXING_DELETE: - if (!isOpen) - namenode.getRpcServer().delete(path, true); + try { + if (doMove) { + if (!isOpen) { + copyBlocksToLostFound(parent, file, blocks); + } + } + if (doDelete) { + if (!isOpen) { + LOG.warn("\n - deleting corrupted file " + path); + namenode.getRpcServer().delete(path, true); + } + } + } catch (IOException e) { + LOG.error("error processing " + path + ": " + e.toString()); } } if (showFiles) { @@ -401,8 +412,8 @@ private void check(String parent, HdfsFileStatus file, Result res) throws IOExce } } - private void lostFoundMove(String parent, HdfsFileStatus file, LocatedBlocks blocks) - throws IOException { + private void copyBlocksToLostFound(String parent, HdfsFileStatus file, + LocatedBlocks blocks) throws IOException { final DFSClient dfs = new DFSClient(NameNode.getAddress(conf), conf); try { if (!lfInited) { @@ -436,12 +447,10 @@ private void lostFoundMove(String parent, HdfsFileStatus file, LocatedBlocks blo } if (fos == null) { fos = dfs.create(target + "/" + chain, true); - if (fos != null) chain++; + if (fos != null) + chain++; else { - LOG.warn(errmsg + ": could not store chain " + chain); - // perhaps we should bail out here... - // return; - continue; + throw new IOException(errmsg + ": could not store chain " + chain); } } @@ -458,8 +467,7 @@ private void lostFoundMove(String parent, HdfsFileStatus file, LocatedBlocks blo } } if (fos != null) fos.close(); - LOG.warn("\n - moved corrupted file " + fullName + " to /lost+found"); - dfs.delete(fullName, true); + LOG.warn("\n - copied corrupted file " + fullName + " to /lost+found"); } catch (Exception e) { e.printStackTrace(); LOG.warn(errmsg + ": " + e.getMessage()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index a2dc8f45db..387f72fe52 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -227,7 +227,7 @@ public Object run() throws Exception { } } - public void testFsckMove() throws Exception { + public void testFsckMoveAndDelete() throws Exception { DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3, 8*1024); MiniDFSCluster cluster = null; FileSystem fs = null; @@ -248,8 +248,9 @@ public void testFsckMove() throws Exception { String[] fileNames = util.getFileNames(topDir); DFSClient dfsClient = new DFSClient(new InetSocketAddress("localhost", cluster.getNameNodePort()), conf); + String corruptFileName = fileNames[0]; ExtendedBlock block = dfsClient.getNamenode().getBlockLocations( - fileNames[0], 0, Long.MAX_VALUE).get(0).getBlock(); + corruptFileName, 0, Long.MAX_VALUE).get(0).getBlock(); for (int i=0; i<4; i++) { File blockFile = MiniDFSCluster.getBlockFile(i, block); if(blockFile != null && blockFile.exists()) { @@ -267,8 +268,21 @@ public void testFsckMove() throws Exception { outStr = runFsck(conf, 1, false, "/"); } + // After a fsck -move, the corrupted file should still exist. + outStr = runFsck(conf, 1, true, "/", "-move" ); + assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS)); + String[] newFileNames = util.getFileNames(topDir); + boolean found = false; + for (String f : newFileNames) { + if (f.equals(corruptFileName)) { + found = true; + break; + } + } + assertTrue(found); + // Fix the filesystem by moving corrupted files to lost+found - outStr = runFsck(conf, 1, true, "/", "-move"); + outStr = runFsck(conf, 1, true, "/", "-move", "-delete"); assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS)); // Check to make sure we have healthy filesystem