HADOOP-18636 LocalDirAllocator cannot recover from directory tree deletion (#5412)
Even though DiskChecker.mkdirsWithExistsCheck() will create the directory tree, it is only called *after* the enumeration of directories with available space has completed. Directories which don't exist are reported as having 0 space, therefore the mkdirs code is never reached. Adding a simple mkdirs() -without bothering to check the outcome- ensures that if a dir has been deleted then it will be reconstructed if possible. If it can't it will still have 0 bytes of space reported and so be excluded from the allocation. Contributed by Steve Loughran
This commit is contained in:
parent
a3b0135ea6
commit
ee71318d72
@ -414,7 +414,14 @@ public Path getLocalPathForWrite(String pathStr, long size,
|
|||||||
|
|
||||||
//build the "roulette wheel"
|
//build the "roulette wheel"
|
||||||
for(int i =0; i < ctx.dirDF.length; ++i) {
|
for(int i =0; i < ctx.dirDF.length; ++i) {
|
||||||
availableOnDisk[i] = ctx.dirDF[i].getAvailable();
|
final DF target = ctx.dirDF[i];
|
||||||
|
// attempt to recreate the dir so that getAvailable() is valid
|
||||||
|
// if it fails, getAvailable() will return 0, so the dir will
|
||||||
|
// be declared unavailable.
|
||||||
|
// return value is logged at debug to keep spotbugs quiet.
|
||||||
|
final boolean b = new File(target.getDirPath()).mkdirs();
|
||||||
|
LOG.debug("mkdirs of {}={}", target, b);
|
||||||
|
availableOnDisk[i] = target.getAvailable();
|
||||||
totalAvailable += availableOnDisk[i];
|
totalAvailable += availableOnDisk[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -548,5 +548,24 @@ public void testGetLocalPathForWriteForLessSpace() throws Exception {
|
|||||||
"p1/x", Long.MAX_VALUE - 1), "Expect a DiskErrorException.",
|
"p1/x", Long.MAX_VALUE - 1), "Expect a DiskErrorException.",
|
||||||
() -> dirAllocator.getLocalPathForWrite("p1/x", Long.MAX_VALUE - 1, conf));
|
() -> dirAllocator.getLocalPathForWrite("p1/x", Long.MAX_VALUE - 1, conf));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test for HADOOP-18636 LocalDirAllocator cannot recover from directory tree deletion.
|
||||||
|
*/
|
||||||
|
@Test(timeout = 30000)
|
||||||
|
public void testDirectoryRecovery() throws Throwable {
|
||||||
|
String dir0 = buildBufferDir(ROOT, 0);
|
||||||
|
String subdir = dir0 + "/subdir1/subdir2";
|
||||||
|
|
||||||
|
conf.set(CONTEXT, subdir);
|
||||||
|
// get local path and an ancestor
|
||||||
|
final Path pathForWrite = dirAllocator.getLocalPathForWrite("file", -1, conf);
|
||||||
|
final Path ancestor = pathForWrite.getParent().getParent();
|
||||||
|
|
||||||
|
// delete that ancestor
|
||||||
|
localFs.delete(ancestor, true);
|
||||||
|
// and expect to get a new file back
|
||||||
|
dirAllocator.getLocalPathForWrite("file2", -1, conf);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user