YARN-10855. yarn logs cli fails to retrieve logs if any TFile is corrupt or empty. Contributed by Jim Brennan.

This commit is contained in:
zhuqi-lucas 2021-07-19 10:11:20 +08:00
parent 6ed7670a93
commit 0ac443b1f8
3 changed files with 43 additions and 22 deletions

View File

@ -396,7 +396,9 @@ public void testFetchFinishedApplictionLogs() throws Exception {
ContainerId containerId1 = ContainerId.newContainerId(appAttemptId1, 1); ContainerId containerId1 = ContainerId.newContainerId(appAttemptId1, 1);
ContainerId containerId2 = ContainerId.newContainerId(appAttemptId1, 2); ContainerId containerId2 = ContainerId.newContainerId(appAttemptId1, 2);
ContainerId containerId3 = ContainerId.newContainerId(appAttemptId2, 3); ContainerId containerId3 = ContainerId.newContainerId(appAttemptId2, 3);
ContainerId containerId4 = ContainerId.newContainerId(appAttemptId2, 4);
final NodeId nodeId = NodeId.newInstance("localhost", 1234); final NodeId nodeId = NodeId.newInstance("localhost", 1234);
final NodeId badNodeId = NodeId.newInstance("badhost", 5678);
// create local logs // create local logs
String rootLogDir = "target/LocalLogs"; String rootLogDir = "target/LocalLogs";
@ -449,6 +451,8 @@ public void testFetchFinishedApplictionLogs() throws Exception {
containerId2, path, fs); containerId2, path, fs);
uploadContainerLogIntoRemoteDir(ugi, conf, rootLogDirs, nodeId, uploadContainerLogIntoRemoteDir(ugi, conf, rootLogDirs, nodeId,
containerId3, path, fs); containerId3, path, fs);
uploadTruncatedTFileIntoRemoteDir(ugi, conf, badNodeId,
containerId4, fs);
YarnClient mockYarnClient = YarnClient mockYarnClient =
createMockYarnClient( createMockYarnClient(
@ -801,6 +805,17 @@ public ContainerReport getContainerReport(String containerIdStr)
"Invalid ContainerId specified")); "Invalid ContainerId specified"));
sysErrStream.reset(); sysErrStream.reset();
// Uploaded the empty log for container4. We should see a message
// showing the log for container4 is not present.
exitCode =
cli.run(new String[] {"-applicationId", appId.toString(),
"-nodeAddress", badNodeId.toString(), "-containerId",
containerId4.toString()});
assertTrue(exitCode == -1);
assertTrue(sysErrStream.toString().contains(
"Can not find any log file matching the pattern"));
sysErrStream.reset();
fs.delete(new Path(remoteLogRootDir), true); fs.delete(new Path(remoteLogRootDir), true);
fs.delete(new Path(rootLogDir), true); fs.delete(new Path(rootLogDir), true);
} }
@ -1820,6 +1835,21 @@ private static void uploadEmptyContainerLogIntoRemoteDir(UserGroupInformation ug
} }
} }
private static void uploadTruncatedTFileIntoRemoteDir(
UserGroupInformation ugi, Configuration configuration,
NodeId nodeId, ContainerId containerId,
FileSystem fs) throws Exception {
LogAggregationFileControllerFactory factory
= new LogAggregationFileControllerFactory(configuration);
LogAggregationFileController fileFormat = factory
.getFileControllerForWrite();
ApplicationId appId = containerId.getApplicationAttemptId()
.getApplicationId();
Path path = fileFormat.getRemoteNodeLogFileForApp(
appId, ugi.getCurrentUser().getShortUserName(), nodeId);
fs.create(path, true).close();
}
private LogsCLI createCli() throws IOException, YarnException { private LogsCLI createCli() throws IOException, YarnException {
YarnClient mockYarnClient = YarnClient mockYarnClient =
createMockYarnClient(YarnApplicationState.FINISHED, createMockYarnClient(YarnApplicationState.FINISHED,

View File

@ -565,7 +565,7 @@ public void close() throws DSQuotaExceededException {
@Public @Public
@Evolving @Evolving
public static class LogReader { public static class LogReader implements AutoCloseable {
private final FSDataInputStream fsDataIStream; private final FSDataInputStream fsDataIStream;
private final TFile.Reader.Scanner scanner; private final TFile.Reader.Scanner scanner;

View File

@ -46,7 +46,6 @@
import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException; import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat;
import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogKey; import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogKey;
import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogReader; import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogReader;
import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogValue; import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogValue;
@ -193,10 +192,7 @@ public boolean readAggregatedLogs(ContainerLogsRequest logRequest,
if ((nodeId == null || nodeName.contains(LogAggregationUtils if ((nodeId == null || nodeName.contains(LogAggregationUtils
.getNodeString(nodeId))) && !nodeName.endsWith( .getNodeString(nodeId))) && !nodeName.endsWith(
LogAggregationUtils.TMP_FILE_SUFFIX)) { LogAggregationUtils.TMP_FILE_SUFFIX)) {
AggregatedLogFormat.LogReader reader = null; try (LogReader reader = new LogReader(conf, thisNodeFile.getPath())) {
try {
reader = new AggregatedLogFormat.LogReader(conf,
thisNodeFile.getPath());
DataInputStream valueStream; DataInputStream valueStream;
LogKey key = new LogKey(); LogKey key = new LogKey();
valueStream = reader.next(key); valueStream = reader.next(key);
@ -251,10 +247,10 @@ public boolean readAggregatedLogs(ContainerLogsRequest logRequest,
key = new LogKey(); key = new LogKey();
valueStream = reader.next(key); valueStream = reader.next(key);
} }
} finally { } catch (IOException ex) {
if (reader != null) { LOG.error("Skipping empty or corrupt file " +
reader.close(); thisNodeFile.getPath(), ex);
} continue; // skip empty or corrupt files
} }
} }
} }
@ -268,10 +264,7 @@ public Map<String, List<ContainerLogFileInfo>> getLogMetaFilesOfNode(
Map<String, List<ContainerLogFileInfo>> logMetaFiles = new HashMap<>(); Map<String, List<ContainerLogFileInfo>> logMetaFiles = new HashMap<>();
Path nodePath = currentNodeFile.getPath(); Path nodePath = currentNodeFile.getPath();
LogReader reader = try (LogReader reader = new LogReader(conf, nodePath)) {
new LogReader(conf,
nodePath);
try {
DataInputStream valueStream; DataInputStream valueStream;
LogKey key = new LogKey(); LogKey key = new LogKey();
valueStream = reader.next(key); valueStream = reader.next(key);
@ -286,8 +279,6 @@ public Map<String, List<ContainerLogFileInfo>> getLogMetaFilesOfNode(
key = new LogKey(); key = new LogKey();
valueStream = reader.next(key); valueStream = reader.next(key);
} }
} finally {
reader.close();
} }
return logMetaFiles; return logMetaFiles;
} }
@ -349,10 +340,8 @@ public List<ContainerLogMeta> readAggregatedLogsMeta(
} }
if (!thisNodeFile.getPath().getName() if (!thisNodeFile.getPath().getName()
.endsWith(LogAggregationUtils.TMP_FILE_SUFFIX)) { .endsWith(LogAggregationUtils.TMP_FILE_SUFFIX)) {
AggregatedLogFormat.LogReader reader = try (LogReader reader = new LogReader(conf,
new AggregatedLogFormat.LogReader(conf, thisNodeFile.getPath())) {
thisNodeFile.getPath());
try {
DataInputStream valueStream; DataInputStream valueStream;
LogKey key = new LogKey(); LogKey key = new LogKey();
valueStream = reader.next(key); valueStream = reader.next(key);
@ -383,8 +372,10 @@ public List<ContainerLogMeta> readAggregatedLogsMeta(
key = new LogKey(); key = new LogKey();
valueStream = reader.next(key); valueStream = reader.next(key);
} }
} finally { } catch (IOException ex) {
reader.close(); LOG.error("Skipping empty or corrupt file " +
thisNodeFile.getPath(), ex);
continue; // skip empty or corrupt files
} }
} }
} }