HDFS-3044. fsck move should be non-destructive by default. Contributed by Colin Patrick McCabe

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1304063 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Eli Collins 2012-03-22 21:11:18 +00:00
parent a8ebdaeb08
commit 4feef86372
3 changed files with 57 additions and 32 deletions

View File

@ -252,6 +252,9 @@ Release 0.23.3 - UNRELEASED
HDFS-3086. Change Datanode not to send storage list in registration. HDFS-3086. Change Datanode not to send storage list in registration.
(szetszwo) (szetszwo)
HDFS-3044. fsck move should be non-destructive by default.
(Colin Patrick McCabe via eli)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-3024. Improve performance of stringification in addStoredBlock (todd) HDFS-3024. Improve performance of stringification in addStoredBlock (todd)

View File

@ -85,13 +85,6 @@ public class NamenodeFsck {
public static final String NONEXISTENT_STATUS = "does not exist"; public static final String NONEXISTENT_STATUS = "does not exist";
public static final String FAILURE_STATUS = "FAILED"; public static final String FAILURE_STATUS = "FAILED";
/** Don't attempt any fixing . */
public static final int FIXING_NONE = 0;
/** Move corrupted files to /lost+found . */
public static final int FIXING_MOVE = 1;
/** Delete corrupted files. */
public static final int FIXING_DELETE = 2;
private final NameNode namenode; private final NameNode namenode;
private final NetworkTopology networktopology; private final NetworkTopology networktopology;
private final int totalDatanodes; private final int totalDatanodes;
@ -107,7 +100,21 @@ public class NamenodeFsck {
private boolean showLocations = false; private boolean showLocations = false;
private boolean showRacks = false; private boolean showRacks = false;
private boolean showCorruptFileBlocks = false; private boolean showCorruptFileBlocks = false;
private int fixing = FIXING_NONE;
/**
* True if the user specified the -move option.
*
* Whe this option is in effect, we will copy salvaged blocks into the lost
* and found. */
private boolean doMove = false;
/**
* True if the user specified the -delete option.
*
* Whe this option is in effect, we will delete corrupted files.
*/
private boolean doDelete = false;
private String path = "/"; private String path = "/";
// We return back N files that are corrupt; the list of files returned is // We return back N files that are corrupt; the list of files returned is
@ -144,8 +151,8 @@ public class NamenodeFsck {
for (Iterator<String> it = pmap.keySet().iterator(); it.hasNext();) { for (Iterator<String> it = pmap.keySet().iterator(); it.hasNext();) {
String key = it.next(); String key = it.next();
if (key.equals("path")) { this.path = pmap.get("path")[0]; } if (key.equals("path")) { this.path = pmap.get("path")[0]; }
else if (key.equals("move")) { this.fixing = FIXING_MOVE; } else if (key.equals("move")) { this.doMove = true; }
else if (key.equals("delete")) { this.fixing = FIXING_DELETE; } else if (key.equals("delete")) { this.doDelete = true; }
else if (key.equals("files")) { this.showFiles = true; } else if (key.equals("files")) { this.showFiles = true; }
else if (key.equals("blocks")) { this.showBlocks = true; } else if (key.equals("blocks")) { this.showBlocks = true; }
else if (key.equals("locations")) { this.showLocations = true; } else if (key.equals("locations")) { this.showLocations = true; }
@ -377,18 +384,22 @@ private void check(String parent, HdfsFileStatus file, Result res) throws IOExce
+ " blocks of total size " + missize + " B."); + " blocks of total size " + missize + " B.");
} }
res.corruptFiles++; res.corruptFiles++;
switch(fixing) { try {
case FIXING_NONE: if (doMove) {
break; if (!isOpen) {
case FIXING_MOVE: copyBlocksToLostFound(parent, file, blocks);
if (!isOpen) }
lostFoundMove(parent, file, blocks); }
break; if (doDelete) {
case FIXING_DELETE: if (!isOpen) {
if (!isOpen) LOG.warn("\n - deleting corrupted file " + path);
namenode.getRpcServer().delete(path, true); namenode.getRpcServer().delete(path, true);
} }
} }
} catch (IOException e) {
LOG.error("error processing " + path + ": " + e.toString());
}
}
if (showFiles) { if (showFiles) {
if (missing > 0) { if (missing > 0) {
out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n"); out.print(" MISSING " + missing + " blocks of total size " + missize + " B\n");
@ -401,8 +412,8 @@ private void check(String parent, HdfsFileStatus file, Result res) throws IOExce
} }
} }
private void lostFoundMove(String parent, HdfsFileStatus file, LocatedBlocks blocks) private void copyBlocksToLostFound(String parent, HdfsFileStatus file,
throws IOException { LocatedBlocks blocks) throws IOException {
final DFSClient dfs = new DFSClient(NameNode.getAddress(conf), conf); final DFSClient dfs = new DFSClient(NameNode.getAddress(conf), conf);
try { try {
if (!lfInited) { if (!lfInited) {
@ -436,12 +447,10 @@ private void lostFoundMove(String parent, HdfsFileStatus file, LocatedBlocks blo
} }
if (fos == null) { if (fos == null) {
fos = dfs.create(target + "/" + chain, true); fos = dfs.create(target + "/" + chain, true);
if (fos != null) chain++; if (fos != null)
chain++;
else { else {
LOG.warn(errmsg + ": could not store chain " + chain); throw new IOException(errmsg + ": could not store chain " + chain);
// perhaps we should bail out here...
// return;
continue;
} }
} }
@ -458,8 +467,7 @@ private void lostFoundMove(String parent, HdfsFileStatus file, LocatedBlocks blo
} }
} }
if (fos != null) fos.close(); if (fos != null) fos.close();
LOG.warn("\n - moved corrupted file " + fullName + " to /lost+found"); LOG.warn("\n - copied corrupted file " + fullName + " to /lost+found");
dfs.delete(fullName, true);
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
LOG.warn(errmsg + ": " + e.getMessage()); LOG.warn(errmsg + ": " + e.getMessage());

View File

@ -227,7 +227,7 @@ public Object run() throws Exception {
} }
} }
public void testFsckMove() throws Exception { public void testFsckMoveAndDelete() throws Exception {
DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3, 8*1024); DFSTestUtil util = new DFSTestUtil("TestFsck", 5, 3, 8*1024);
MiniDFSCluster cluster = null; MiniDFSCluster cluster = null;
FileSystem fs = null; FileSystem fs = null;
@ -248,8 +248,9 @@ public void testFsckMove() throws Exception {
String[] fileNames = util.getFileNames(topDir); String[] fileNames = util.getFileNames(topDir);
DFSClient dfsClient = new DFSClient(new InetSocketAddress("localhost", DFSClient dfsClient = new DFSClient(new InetSocketAddress("localhost",
cluster.getNameNodePort()), conf); cluster.getNameNodePort()), conf);
String corruptFileName = fileNames[0];
ExtendedBlock block = dfsClient.getNamenode().getBlockLocations( ExtendedBlock block = dfsClient.getNamenode().getBlockLocations(
fileNames[0], 0, Long.MAX_VALUE).get(0).getBlock(); corruptFileName, 0, Long.MAX_VALUE).get(0).getBlock();
for (int i=0; i<4; i++) { for (int i=0; i<4; i++) {
File blockFile = MiniDFSCluster.getBlockFile(i, block); File blockFile = MiniDFSCluster.getBlockFile(i, block);
if(blockFile != null && blockFile.exists()) { if(blockFile != null && blockFile.exists()) {
@ -267,9 +268,22 @@ public void testFsckMove() throws Exception {
outStr = runFsck(conf, 1, false, "/"); outStr = runFsck(conf, 1, false, "/");
} }
// Fix the filesystem by moving corrupted files to lost+found // After a fsck -move, the corrupted file should still exist.
outStr = runFsck(conf, 1, true, "/", "-move" ); outStr = runFsck(conf, 1, true, "/", "-move" );
assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS)); assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
String[] newFileNames = util.getFileNames(topDir);
boolean found = false;
for (String f : newFileNames) {
if (f.equals(corruptFileName)) {
found = true;
break;
}
}
assertTrue(found);
// Fix the filesystem by moving corrupted files to lost+found
outStr = runFsck(conf, 1, true, "/", "-move", "-delete");
assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
// Check to make sure we have healthy filesystem // Check to make sure we have healthy filesystem
outStr = runFsck(conf, 0, true, "/"); outStr = runFsck(conf, 0, true, "/");