MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader and IndexOutOfBoundsException. Contributed by Junping Du.

This commit is contained in:
Varun Vasudev 2016-02-23 13:05:18 +05:30
parent 140cb5d745
commit c6f2d761d5
3 changed files with 67 additions and 2 deletions

View File

@ -724,6 +724,9 @@ Release 2.8.0 - UNRELEASED
MAPREDUCE-6616. Fail to create jobhistory file if there are some multibyte MAPREDUCE-6616. Fail to create jobhistory file if there are some multibyte
characters in the job name. (Kousuke Saruta via aajisaka) characters in the job name. (Kousuke Saruta via aajisaka)
MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader
and IndexOutOfBoundsException. (Junping Du via vvasudev)
Release 2.7.3 - UNRELEASED Release 2.7.3 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES
@ -763,6 +766,9 @@ Release 2.7.3 - UNRELEASED
MAPREDUCE-6191. Improve clearing stale state of Java serialization MAPREDUCE-6191. Improve clearing stale state of Java serialization
testcase. (Sam Liu via Eric Yang) testcase. (Sam Liu via Eric Yang)
MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader
and IndexOutOfBoundsException. (Junping Du via vvasudev)
Release 2.7.2 - 2016-01-25 Release 2.7.2 - 2016-01-25
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES
@ -1056,6 +1062,9 @@ Release 2.6.5 - UNRELEASED
MAPREDUCE-6191. Improve clearing stale state of Java serialization MAPREDUCE-6191. Improve clearing stale state of Java serialization
testcase. (Sam Liu via Eric Yang) testcase. (Sam Liu via Eric Yang)
MAPREDUCE-6635. Unsafe long to int conversion in UncompressedSplitLineReader
and IndexOutOfBoundsException. (Junping Du via vvasudev)
Release 2.6.4 - 2016-02-11 Release 2.6.4 - 2016-02-11
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -53,8 +53,11 @@ protected int fillBuffer(InputStream in, byte[] buffer, boolean inDelimiter)
throws IOException { throws IOException {
int maxBytesToRead = buffer.length; int maxBytesToRead = buffer.length;
if (totalBytesRead < splitLength) { if (totalBytesRead < splitLength) {
maxBytesToRead = Math.min(maxBytesToRead, long leftBytesForSplit = splitLength - totalBytesRead;
(int)(splitLength - totalBytesRead)); // check if leftBytesForSplit exceed Integer.MAX_VALUE
if (leftBytesForSplit <= Integer.MAX_VALUE) {
maxBytesToRead = Math.min(maxBytesToRead, (int)leftBytesForSplit);
}
} }
int bytesRead = in.read(buffer, 0, maxBytesToRead); int bytesRead = in.read(buffer, 0, maxBytesToRead);

View File

@ -110,6 +110,43 @@ private void testSplitRecordsForFile(Configuration conf,
numRecordsNoSplits, numRecordsFirstSplit + numRecordsRemainingSplits); numRecordsNoSplits, numRecordsFirstSplit + numRecordsRemainingSplits);
} }
private void testLargeSplitRecordForFile(Configuration conf,
long firstSplitLength, long testFileSize, Path testFilePath)
throws IOException {
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
assertTrue("unexpected firstSplitLength:" + firstSplitLength,
testFileSize < firstSplitLength);
String delimiter = conf.get("textinputformat.record.delimiter");
byte[] recordDelimiterBytes = null;
if (null != delimiter) {
recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
}
// read the data without splitting to count the records
FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
(String[])null);
LineRecordReader reader = new LineRecordReader(conf, split,
recordDelimiterBytes);
LongWritable key = new LongWritable();
Text value = new Text();
int numRecordsNoSplits = 0;
while (reader.next(key, value)) {
++numRecordsNoSplits;
}
reader.close();
// count the records in the first split
split = new FileSplit(testFilePath, 0, firstSplitLength, (String[])null);
reader = new LineRecordReader(conf, split, recordDelimiterBytes);
int numRecordsFirstSplit = 0;
while (reader.next(key, value)) {
++numRecordsFirstSplit;
}
reader.close();
assertEquals("Unexpected number of records in split",
numRecordsNoSplits, numRecordsFirstSplit);
}
@Test @Test
public void testBzip2SplitEndsAtCR() throws IOException { public void testBzip2SplitEndsAtCR() throws IOException {
// the test data contains a carriage-return at the end of the first // the test data contains a carriage-return at the end of the first
@ -331,6 +368,22 @@ private Path createInputFile(Configuration conf, String data)
return file; return file;
} }
@Test
public void testUncompressedInputWithLargeSplitSize() throws Exception {
Configuration conf = new Configuration();
// single char delimiter
String inputData = "abcde +fghij+ klmno+pqrst+uvwxyz";
Path inputFile = createInputFile(conf, inputData);
conf.set("textinputformat.record.delimiter", "+");
// split size over max value of integer
long longSplitSize = (long)Integer.MAX_VALUE + 1;
for (int bufferSize = 1; bufferSize <= inputData.length(); bufferSize++) {
conf.setInt("io.file.buffer.size", bufferSize);
testLargeSplitRecordForFile(conf, longSplitSize, inputData.length(),
inputFile);
}
}
@Test @Test
public void testUncompressedInput() throws Exception { public void testUncompressedInput() throws Exception {
Configuration conf = new Configuration(); Configuration conf = new Configuration();