MAPREDUCE-7494. File stream leak when LineRecordReader is interrupted (#7117)

Contributed by Davin Tjong
This commit is contained in:
Davin Tjong 2024-10-16 03:41:18 -07:00 committed by GitHub
parent 9321e322d2
commit 78a08b3b78
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 44 additions and 37 deletions

View File

@ -302,6 +302,8 @@ public synchronized void close() throws IOException {
try { try {
if (in != null) { if (in != null) {
in.close(); in.close();
} else if (fileIn != null) {
fileIn.close();
} }
} finally { } finally {
if (decompressor != null) { if (decompressor != null) {

View File

@ -99,45 +99,50 @@ public void initialize(InputSplit genericSplit,
MRJobConfig.INPUT_FILE_MANDATORY_PREFIX); MRJobConfig.INPUT_FILE_MANDATORY_PREFIX);
fileIn = FutureIO.awaitFuture(builder.build()); fileIn = FutureIO.awaitFuture(builder.build());
CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); try {
if (null!=codec) { CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
isCompressedInput = true; if (null!=codec) {
decompressor = CodecPool.getDecompressor(codec); isCompressedInput = true;
if (codec instanceof SplittableCompressionCodec) { decompressor = CodecPool.getDecompressor(codec);
final SplitCompressionInputStream cIn = if (codec instanceof SplittableCompressionCodec) {
((SplittableCompressionCodec)codec).createInputStream( final SplitCompressionInputStream cIn =
fileIn, decompressor, start, end, ((SplittableCompressionCodec)codec).createInputStream(
SplittableCompressionCodec.READ_MODE.BYBLOCK); fileIn, decompressor, start, end,
in = new CompressedSplitLineReader(cIn, job, SplittableCompressionCodec.READ_MODE.BYBLOCK);
this.recordDelimiterBytes); in = new CompressedSplitLineReader(cIn, job,
start = cIn.getAdjustedStart(); this.recordDelimiterBytes);
end = cIn.getAdjustedEnd(); start = cIn.getAdjustedStart();
filePosition = cIn; end = cIn.getAdjustedEnd();
} else { filePosition = cIn;
if (start != 0) { } else {
// So we have a split that is only part of a file stored using if (start != 0) {
// a Compression codec that cannot be split. // So we have a split that is only part of a file stored using
throw new IOException("Cannot seek in " + // a Compression codec that cannot be split.
codec.getClass().getSimpleName() + " compressed stream"); throw new IOException("Cannot seek in " +
} codec.getClass().getSimpleName() + " compressed stream");
}
in = new SplitLineReader(codec.createInputStream(fileIn, in = new SplitLineReader(codec.createInputStream(fileIn,
decompressor), job, this.recordDelimiterBytes); decompressor), job, this.recordDelimiterBytes);
filePosition = fileIn;
}
} else {
fileIn.seek(start);
in = new UncompressedSplitLineReader(
fileIn, job, this.recordDelimiterBytes, split.getLength());
filePosition = fileIn; filePosition = fileIn;
} }
} else { // If this is not the first split, we always throw away first record
fileIn.seek(start); // because we always (except the last split) read one extra line in
in = new UncompressedSplitLineReader( // next() method.
fileIn, job, this.recordDelimiterBytes, split.getLength()); if (start != 0) {
filePosition = fileIn; start += in.readLine(new Text(), 0, maxBytesToConsume(start));
}
this.pos = start;
} catch (Exception e) {
fileIn.close();
throw e;
} }
// If this is not the first split, we always throw away first record
// because we always (except the last split) read one extra line in
// next() method.
if (start != 0) {
start += in.readLine(new Text(), 0, maxBytesToConsume(start));
}
this.pos = start;
} }