HADOOP-16769. LocalDirAllocator to provide diagnostics when file creation fails (#4842)
The patch provides detailed diagnostics of file creation failure in LocalDirAllocator. Contributed by: Ashutosh Gupta
This commit is contained in:
parent
43c1ebae16
commit
0f03299eba
@ -396,6 +396,10 @@ public Path getLocalPathForWrite(String pathStr, long size,
|
|||||||
Context ctx = confChanged(conf);
|
Context ctx = confChanged(conf);
|
||||||
int numDirs = ctx.localDirs.length;
|
int numDirs = ctx.localDirs.length;
|
||||||
int numDirsSearched = 0;
|
int numDirsSearched = 0;
|
||||||
|
// Max capacity in any directory
|
||||||
|
long maxCapacity = 0;
|
||||||
|
String errorText = null;
|
||||||
|
IOException diskException = null;
|
||||||
//remove the leading slash from the path (to make sure that the uri
|
//remove the leading slash from the path (to make sure that the uri
|
||||||
//resolution results in a valid path on the dir being checked)
|
//resolution results in a valid path on the dir being checked)
|
||||||
if (pathStr.startsWith("/")) {
|
if (pathStr.startsWith("/")) {
|
||||||
@ -444,9 +448,18 @@ public Path getLocalPathForWrite(String pathStr, long size,
|
|||||||
int dirNum = ctx.getAndIncrDirNumLastAccessed(randomInc);
|
int dirNum = ctx.getAndIncrDirNumLastAccessed(randomInc);
|
||||||
while (numDirsSearched < numDirs) {
|
while (numDirsSearched < numDirs) {
|
||||||
long capacity = ctx.dirDF[dirNum].getAvailable();
|
long capacity = ctx.dirDF[dirNum].getAvailable();
|
||||||
|
if (capacity > maxCapacity) {
|
||||||
|
maxCapacity = capacity;
|
||||||
|
}
|
||||||
if (capacity > size) {
|
if (capacity > size) {
|
||||||
returnPath =
|
try {
|
||||||
createPath(ctx.localDirs[dirNum], pathStr, checkWrite);
|
returnPath = createPath(ctx.localDirs[dirNum], pathStr,
|
||||||
|
checkWrite);
|
||||||
|
} catch (IOException e) {
|
||||||
|
errorText = e.getMessage();
|
||||||
|
diskException = e;
|
||||||
|
LOG.debug("DiskException caught for dir {}", ctx.localDirs[dirNum], e);
|
||||||
|
}
|
||||||
if (returnPath != null) {
|
if (returnPath != null) {
|
||||||
ctx.getAndIncrDirNumLastAccessed(numDirsSearched);
|
ctx.getAndIncrDirNumLastAccessed(numDirsSearched);
|
||||||
break;
|
break;
|
||||||
@ -462,8 +475,13 @@ public Path getLocalPathForWrite(String pathStr, long size,
|
|||||||
}
|
}
|
||||||
|
|
||||||
//no path found
|
//no path found
|
||||||
throw new DiskErrorException("Could not find any valid local " +
|
String newErrorText = "Could not find any valid local directory for " +
|
||||||
"directory for " + pathStr);
|
pathStr + " with requested size " + size +
|
||||||
|
" as the max capacity in any directory is " + maxCapacity;
|
||||||
|
if (errorText != null) {
|
||||||
|
newErrorText = newErrorText + " due to " + errorText;
|
||||||
|
}
|
||||||
|
throw new DiskErrorException(newErrorText, diskException);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Creates a file on the local FS. Pass size as
|
/** Creates a file on the local FS. Pass size as
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.test.LambdaTestUtils;
|
||||||
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
|
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
|
||||||
import org.apache.hadoop.util.Shell;
|
import org.apache.hadoop.util.Shell;
|
||||||
|
|
||||||
@ -532,4 +533,20 @@ public void testGetLocalPathForWriteForInvalidPaths() throws Exception {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test to verify LocalDirAllocator log details to provide diagnostics when file creation fails.
|
||||||
|
*
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test(timeout = 30000)
|
||||||
|
public void testGetLocalPathForWriteForLessSpace() throws Exception {
|
||||||
|
String dir0 = buildBufferDir(ROOT, 0);
|
||||||
|
String dir1 = buildBufferDir(ROOT, 1);
|
||||||
|
conf.set(CONTEXT, dir0 + "," + dir1);
|
||||||
|
LambdaTestUtils.intercept(DiskErrorException.class,
|
||||||
|
String.format("Could not find any valid local directory for %s with requested size %s",
|
||||||
|
"p1/x", Long.MAX_VALUE - 1), "Expect a DiskErrorException.",
|
||||||
|
() -> dirAllocator.getLocalPathForWrite("p1/x", Long.MAX_VALUE - 1, conf));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user