YARN-10855. yarn logs cli fails to retrieve logs if any TFile is corrupt or empty. Contributed by Jim Brennan.
This commit is contained in:
parent
6ed7670a93
commit
0ac443b1f8
@ -396,7 +396,9 @@ public void testFetchFinishedApplictionLogs() throws Exception {
|
|||||||
ContainerId containerId1 = ContainerId.newContainerId(appAttemptId1, 1);
|
ContainerId containerId1 = ContainerId.newContainerId(appAttemptId1, 1);
|
||||||
ContainerId containerId2 = ContainerId.newContainerId(appAttemptId1, 2);
|
ContainerId containerId2 = ContainerId.newContainerId(appAttemptId1, 2);
|
||||||
ContainerId containerId3 = ContainerId.newContainerId(appAttemptId2, 3);
|
ContainerId containerId3 = ContainerId.newContainerId(appAttemptId2, 3);
|
||||||
|
ContainerId containerId4 = ContainerId.newContainerId(appAttemptId2, 4);
|
||||||
final NodeId nodeId = NodeId.newInstance("localhost", 1234);
|
final NodeId nodeId = NodeId.newInstance("localhost", 1234);
|
||||||
|
final NodeId badNodeId = NodeId.newInstance("badhost", 5678);
|
||||||
|
|
||||||
// create local logs
|
// create local logs
|
||||||
String rootLogDir = "target/LocalLogs";
|
String rootLogDir = "target/LocalLogs";
|
||||||
@ -449,6 +451,8 @@ public void testFetchFinishedApplictionLogs() throws Exception {
|
|||||||
containerId2, path, fs);
|
containerId2, path, fs);
|
||||||
uploadContainerLogIntoRemoteDir(ugi, conf, rootLogDirs, nodeId,
|
uploadContainerLogIntoRemoteDir(ugi, conf, rootLogDirs, nodeId,
|
||||||
containerId3, path, fs);
|
containerId3, path, fs);
|
||||||
|
uploadTruncatedTFileIntoRemoteDir(ugi, conf, badNodeId,
|
||||||
|
containerId4, fs);
|
||||||
|
|
||||||
YarnClient mockYarnClient =
|
YarnClient mockYarnClient =
|
||||||
createMockYarnClient(
|
createMockYarnClient(
|
||||||
@ -801,6 +805,17 @@ public ContainerReport getContainerReport(String containerIdStr)
|
|||||||
"Invalid ContainerId specified"));
|
"Invalid ContainerId specified"));
|
||||||
sysErrStream.reset();
|
sysErrStream.reset();
|
||||||
|
|
||||||
|
// Uploaded the empty log for container4. We should see a message
|
||||||
|
// showing the log for container4 is not present.
|
||||||
|
exitCode =
|
||||||
|
cli.run(new String[] {"-applicationId", appId.toString(),
|
||||||
|
"-nodeAddress", badNodeId.toString(), "-containerId",
|
||||||
|
containerId4.toString()});
|
||||||
|
assertTrue(exitCode == -1);
|
||||||
|
assertTrue(sysErrStream.toString().contains(
|
||||||
|
"Can not find any log file matching the pattern"));
|
||||||
|
sysErrStream.reset();
|
||||||
|
|
||||||
fs.delete(new Path(remoteLogRootDir), true);
|
fs.delete(new Path(remoteLogRootDir), true);
|
||||||
fs.delete(new Path(rootLogDir), true);
|
fs.delete(new Path(rootLogDir), true);
|
||||||
}
|
}
|
||||||
@ -1820,6 +1835,21 @@ private static void uploadEmptyContainerLogIntoRemoteDir(UserGroupInformation ug
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void uploadTruncatedTFileIntoRemoteDir(
|
||||||
|
UserGroupInformation ugi, Configuration configuration,
|
||||||
|
NodeId nodeId, ContainerId containerId,
|
||||||
|
FileSystem fs) throws Exception {
|
||||||
|
LogAggregationFileControllerFactory factory
|
||||||
|
= new LogAggregationFileControllerFactory(configuration);
|
||||||
|
LogAggregationFileController fileFormat = factory
|
||||||
|
.getFileControllerForWrite();
|
||||||
|
ApplicationId appId = containerId.getApplicationAttemptId()
|
||||||
|
.getApplicationId();
|
||||||
|
Path path = fileFormat.getRemoteNodeLogFileForApp(
|
||||||
|
appId, ugi.getCurrentUser().getShortUserName(), nodeId);
|
||||||
|
fs.create(path, true).close();
|
||||||
|
}
|
||||||
|
|
||||||
private LogsCLI createCli() throws IOException, YarnException {
|
private LogsCLI createCli() throws IOException, YarnException {
|
||||||
YarnClient mockYarnClient =
|
YarnClient mockYarnClient =
|
||||||
createMockYarnClient(YarnApplicationState.FINISHED,
|
createMockYarnClient(YarnApplicationState.FINISHED,
|
||||||
|
@ -565,7 +565,7 @@ public void close() throws DSQuotaExceededException {
|
|||||||
|
|
||||||
@Public
|
@Public
|
||||||
@Evolving
|
@Evolving
|
||||||
public static class LogReader {
|
public static class LogReader implements AutoCloseable {
|
||||||
|
|
||||||
private final FSDataInputStream fsDataIStream;
|
private final FSDataInputStream fsDataIStream;
|
||||||
private final TFile.Reader.Scanner scanner;
|
private final TFile.Reader.Scanner scanner;
|
||||||
|
@ -46,7 +46,6 @@
|
|||||||
import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
|
import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat;
|
|
||||||
import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogKey;
|
import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogKey;
|
||||||
import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogReader;
|
import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogReader;
|
||||||
import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogValue;
|
import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogValue;
|
||||||
@ -193,10 +192,7 @@ public boolean readAggregatedLogs(ContainerLogsRequest logRequest,
|
|||||||
if ((nodeId == null || nodeName.contains(LogAggregationUtils
|
if ((nodeId == null || nodeName.contains(LogAggregationUtils
|
||||||
.getNodeString(nodeId))) && !nodeName.endsWith(
|
.getNodeString(nodeId))) && !nodeName.endsWith(
|
||||||
LogAggregationUtils.TMP_FILE_SUFFIX)) {
|
LogAggregationUtils.TMP_FILE_SUFFIX)) {
|
||||||
AggregatedLogFormat.LogReader reader = null;
|
try (LogReader reader = new LogReader(conf, thisNodeFile.getPath())) {
|
||||||
try {
|
|
||||||
reader = new AggregatedLogFormat.LogReader(conf,
|
|
||||||
thisNodeFile.getPath());
|
|
||||||
DataInputStream valueStream;
|
DataInputStream valueStream;
|
||||||
LogKey key = new LogKey();
|
LogKey key = new LogKey();
|
||||||
valueStream = reader.next(key);
|
valueStream = reader.next(key);
|
||||||
@ -251,10 +247,10 @@ public boolean readAggregatedLogs(ContainerLogsRequest logRequest,
|
|||||||
key = new LogKey();
|
key = new LogKey();
|
||||||
valueStream = reader.next(key);
|
valueStream = reader.next(key);
|
||||||
}
|
}
|
||||||
} finally {
|
} catch (IOException ex) {
|
||||||
if (reader != null) {
|
LOG.error("Skipping empty or corrupt file " +
|
||||||
reader.close();
|
thisNodeFile.getPath(), ex);
|
||||||
}
|
continue; // skip empty or corrupt files
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -268,10 +264,7 @@ public Map<String, List<ContainerLogFileInfo>> getLogMetaFilesOfNode(
|
|||||||
Map<String, List<ContainerLogFileInfo>> logMetaFiles = new HashMap<>();
|
Map<String, List<ContainerLogFileInfo>> logMetaFiles = new HashMap<>();
|
||||||
Path nodePath = currentNodeFile.getPath();
|
Path nodePath = currentNodeFile.getPath();
|
||||||
|
|
||||||
LogReader reader =
|
try (LogReader reader = new LogReader(conf, nodePath)) {
|
||||||
new LogReader(conf,
|
|
||||||
nodePath);
|
|
||||||
try {
|
|
||||||
DataInputStream valueStream;
|
DataInputStream valueStream;
|
||||||
LogKey key = new LogKey();
|
LogKey key = new LogKey();
|
||||||
valueStream = reader.next(key);
|
valueStream = reader.next(key);
|
||||||
@ -286,8 +279,6 @@ public Map<String, List<ContainerLogFileInfo>> getLogMetaFilesOfNode(
|
|||||||
key = new LogKey();
|
key = new LogKey();
|
||||||
valueStream = reader.next(key);
|
valueStream = reader.next(key);
|
||||||
}
|
}
|
||||||
} finally {
|
|
||||||
reader.close();
|
|
||||||
}
|
}
|
||||||
return logMetaFiles;
|
return logMetaFiles;
|
||||||
}
|
}
|
||||||
@ -349,10 +340,8 @@ public List<ContainerLogMeta> readAggregatedLogsMeta(
|
|||||||
}
|
}
|
||||||
if (!thisNodeFile.getPath().getName()
|
if (!thisNodeFile.getPath().getName()
|
||||||
.endsWith(LogAggregationUtils.TMP_FILE_SUFFIX)) {
|
.endsWith(LogAggregationUtils.TMP_FILE_SUFFIX)) {
|
||||||
AggregatedLogFormat.LogReader reader =
|
try (LogReader reader = new LogReader(conf,
|
||||||
new AggregatedLogFormat.LogReader(conf,
|
thisNodeFile.getPath())) {
|
||||||
thisNodeFile.getPath());
|
|
||||||
try {
|
|
||||||
DataInputStream valueStream;
|
DataInputStream valueStream;
|
||||||
LogKey key = new LogKey();
|
LogKey key = new LogKey();
|
||||||
valueStream = reader.next(key);
|
valueStream = reader.next(key);
|
||||||
@ -383,8 +372,10 @@ public List<ContainerLogMeta> readAggregatedLogsMeta(
|
|||||||
key = new LogKey();
|
key = new LogKey();
|
||||||
valueStream = reader.next(key);
|
valueStream = reader.next(key);
|
||||||
}
|
}
|
||||||
} finally {
|
} catch (IOException ex) {
|
||||||
reader.close();
|
LOG.error("Skipping empty or corrupt file " +
|
||||||
|
thisNodeFile.getPath(), ex);
|
||||||
|
continue; // skip empty or corrupt files
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user