HADOOP-16769. LocalDirAllocator to provide diagnostics when file creation fails (#4896)

The patch provides detailed diagnostics of file creation failure in LocalDirAllocator.

Contributed by: Ashutosh Gupta
This commit is contained in:
Ashutosh Gupta 2022-09-21 07:24:47 +01:00 committed by GitHub
parent 3af155ceeb
commit 683fa264ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 39 additions and 4 deletions

View File

@ -396,6 +396,10 @@ public Path getLocalPathForWrite(String pathStr, long size,
Context ctx = confChanged(conf);
int numDirs = ctx.localDirs.length;
int numDirsSearched = 0;
// Max capacity in any directory
long maxCapacity = 0;
String errorText = null;
IOException diskException = null;
//remove the leading slash from the path (to make sure that the uri
//resolution results in a valid path on the dir being checked)
if (pathStr.startsWith("/")) {
@ -444,9 +448,18 @@ public Path getLocalPathForWrite(String pathStr, long size,
int dirNum = ctx.getAndIncrDirNumLastAccessed(randomInc);
while (numDirsSearched < numDirs) {
long capacity = ctx.dirDF[dirNum].getAvailable();
if (capacity > maxCapacity) {
maxCapacity = capacity;
}
if (capacity > size) {
returnPath =
createPath(ctx.localDirs[dirNum], pathStr, checkWrite);
try {
returnPath = createPath(ctx.localDirs[dirNum], pathStr,
checkWrite);
} catch (IOException e) {
errorText = e.getMessage();
diskException = e;
LOG.debug("DiskException caught for dir {}", ctx.localDirs[dirNum], e);
}
if (returnPath != null) {
ctx.getAndIncrDirNumLastAccessed(numDirsSearched);
break;
@ -462,8 +475,13 @@ public Path getLocalPathForWrite(String pathStr, long size,
}
//no path found
throw new DiskErrorException("Could not find any valid local " +
"directory for " + pathStr);
String newErrorText = "Could not find any valid local directory for " +
pathStr + " with requested size " + size +
" as the max capacity in any directory is " + maxCapacity;
if (errorText != null) {
newErrorText = newErrorText + " due to " + errorText;
}
throw new DiskErrorException(newErrorText, diskException);
}
/** Creates a file on the local FS. Pass size as

View File

@ -26,6 +26,7 @@
import java.util.NoSuchElementException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.test.LambdaTestUtils;
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
import org.apache.hadoop.util.Shell;
@ -532,4 +533,20 @@ public void testGetLocalPathForWriteForInvalidPaths() throws Exception {
}
}
/**
* Test to verify LocalDirAllocator log details to provide diagnostics when file creation fails.
*
* @throws Exception
*/
@Test(timeout = 30000)
public void testGetLocalPathForWriteForLessSpace() throws Exception {
String dir0 = buildBufferDir(ROOT, 0);
String dir1 = buildBufferDir(ROOT, 1);
conf.set(CONTEXT, dir0 + "," + dir1);
LambdaTestUtils.intercept(DiskErrorException.class,
String.format("Could not find any valid local directory for %s with requested size %s",
"p1/x", Long.MAX_VALUE - 1), "Expect a DiskErrorException.",
() -> dirAllocator.getLocalPathForWrite("p1/x", Long.MAX_VALUE - 1, conf));
}
}