MAPREDUCE-6481. LineRecordReader may give incomplete record and wrong position/key information for uncompressed input sometimes. Contributed by Zhihai Xu
This commit is contained in:
parent
6c6e734f0b
commit
58d1a02b8d
@ -303,7 +303,10 @@ private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume)
|
|||||||
startPosn = bufferPosn = 0;
|
startPosn = bufferPosn = 0;
|
||||||
bufferLength = fillBuffer(in, buffer, ambiguousByteCount > 0);
|
bufferLength = fillBuffer(in, buffer, ambiguousByteCount > 0);
|
||||||
if (bufferLength <= 0) {
|
if (bufferLength <= 0) {
|
||||||
str.append(recordDelimiterBytes, 0, ambiguousByteCount);
|
if (ambiguousByteCount > 0) {
|
||||||
|
str.append(recordDelimiterBytes, 0, ambiguousByteCount);
|
||||||
|
bytesConsumed += ambiguousByteCount;
|
||||||
|
}
|
||||||
break; // EOF
|
break; // EOF
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -325,13 +328,13 @@ private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume)
|
|||||||
if (appendLength > maxLineLength - txtLength) {
|
if (appendLength > maxLineLength - txtLength) {
|
||||||
appendLength = maxLineLength - txtLength;
|
appendLength = maxLineLength - txtLength;
|
||||||
}
|
}
|
||||||
|
bytesConsumed += ambiguousByteCount;
|
||||||
|
if (appendLength >= 0 && ambiguousByteCount > 0) {
|
||||||
|
//appending the ambiguous characters (refer case 2.2)
|
||||||
|
str.append(recordDelimiterBytes, 0, ambiguousByteCount);
|
||||||
|
ambiguousByteCount = 0;
|
||||||
|
}
|
||||||
if (appendLength > 0) {
|
if (appendLength > 0) {
|
||||||
if (ambiguousByteCount > 0) {
|
|
||||||
str.append(recordDelimiterBytes, 0, ambiguousByteCount);
|
|
||||||
//appending the ambiguous characters (refer case 2.2)
|
|
||||||
bytesConsumed += ambiguousByteCount;
|
|
||||||
ambiguousByteCount=0;
|
|
||||||
}
|
|
||||||
str.append(buffer, startPosn, appendLength);
|
str.append(buffer, startPosn, appendLength);
|
||||||
txtLength += appendLength;
|
txtLength += appendLength;
|
||||||
}
|
}
|
||||||
|
@ -566,6 +566,10 @@ Release 2.8.0 - UNRELEASED
|
|||||||
MAPREDUCE-6452. NPE when intermediate encrypt enabled for LocalRunner.
|
MAPREDUCE-6452. NPE when intermediate encrypt enabled for LocalRunner.
|
||||||
(Zhihai Xu)
|
(Zhihai Xu)
|
||||||
|
|
||||||
|
MAPREDUCE-6481. LineRecordReader may give incomplete record and wrong
|
||||||
|
position/key information for uncompressed input sometimes. (Zhihai Xu via
|
||||||
|
jlowe)
|
||||||
|
|
||||||
Release 2.7.2 - UNRELEASED
|
Release 2.7.2 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
@ -40,8 +40,6 @@ public class UncompressedSplitLineReader extends SplitLineReader {
|
|||||||
private long totalBytesRead = 0;
|
private long totalBytesRead = 0;
|
||||||
private boolean finished = false;
|
private boolean finished = false;
|
||||||
private boolean usingCRLF;
|
private boolean usingCRLF;
|
||||||
private int unusedBytes = 0;
|
|
||||||
private int lastBytesRead = 0;
|
|
||||||
|
|
||||||
public UncompressedSplitLineReader(FSDataInputStream in, Configuration conf,
|
public UncompressedSplitLineReader(FSDataInputStream in, Configuration conf,
|
||||||
byte[] recordDelimiterBytes, long splitLength) throws IOException {
|
byte[] recordDelimiterBytes, long splitLength) throws IOException {
|
||||||
@ -59,7 +57,6 @@ protected int fillBuffer(InputStream in, byte[] buffer, boolean inDelimiter)
|
|||||||
(int)(splitLength - totalBytesRead));
|
(int)(splitLength - totalBytesRead));
|
||||||
}
|
}
|
||||||
int bytesRead = in.read(buffer, 0, maxBytesToRead);
|
int bytesRead = in.read(buffer, 0, maxBytesToRead);
|
||||||
lastBytesRead = bytesRead;
|
|
||||||
|
|
||||||
// If the split ended in the middle of a record delimiter then we need
|
// If the split ended in the middle of a record delimiter then we need
|
||||||
// to read one additional record, as the consumer of the next split will
|
// to read one additional record, as the consumer of the next split will
|
||||||
@ -83,39 +80,17 @@ protected int fillBuffer(InputStream in, byte[] buffer, boolean inDelimiter)
|
|||||||
@Override
|
@Override
|
||||||
public int readLine(Text str, int maxLineLength, int maxBytesToConsume)
|
public int readLine(Text str, int maxLineLength, int maxBytesToConsume)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
long bytesRead = 0;
|
int bytesRead = 0;
|
||||||
if (!finished) {
|
if (!finished) {
|
||||||
// only allow at most one more record to be read after the stream
|
// only allow at most one more record to be read after the stream
|
||||||
// reports the split ended
|
// reports the split ended
|
||||||
if (totalBytesRead > splitLength) {
|
if (totalBytesRead > splitLength) {
|
||||||
finished = true;
|
finished = true;
|
||||||
}
|
}
|
||||||
bytesRead = totalBytesRead;
|
|
||||||
int bytesConsumed = super.readLine(str, maxLineLength, maxBytesToConsume);
|
|
||||||
bytesRead = totalBytesRead - bytesRead;
|
|
||||||
|
|
||||||
// No records left.
|
bytesRead = super.readLine(str, maxLineLength, maxBytesToConsume);
|
||||||
if (bytesConsumed == 0 && bytesRead == 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int bufferSize = getBufferSize();
|
|
||||||
|
|
||||||
// Add the remaining buffer size not used for the last call
|
|
||||||
// of fillBuffer method.
|
|
||||||
if (lastBytesRead <= 0) {
|
|
||||||
bytesRead += bufferSize;
|
|
||||||
} else if (bytesRead > 0) {
|
|
||||||
bytesRead += bufferSize - lastBytesRead;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Adjust the size of the buffer not used for this record.
|
|
||||||
// The size is carried over for the next calculation.
|
|
||||||
bytesRead += unusedBytes;
|
|
||||||
unusedBytes = bufferSize - getBufferPosn();
|
|
||||||
bytesRead -= unusedBytes;
|
|
||||||
}
|
}
|
||||||
return (int) bytesRead;
|
return bytesRead;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
package org.apache.hadoop.mapred;
|
package org.apache.hadoop.mapred;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
@ -357,4 +358,141 @@ public void testUncompressedInputContainingCRLF() throws Exception {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUncompressedInputCustomDelimiterPosValue()
|
||||||
|
throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
String inputData = "1234567890ab12ab345";
|
||||||
|
Path inputFile = createInputFile(conf, inputData);
|
||||||
|
conf.setInt("io.file.buffer.size", 10);
|
||||||
|
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
|
||||||
|
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
|
||||||
|
String delimiter = "ab";
|
||||||
|
byte[] recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
|
||||||
|
FileSplit split = new FileSplit(inputFile, 0, 15, (String[])null);
|
||||||
|
LineRecordReader reader = new LineRecordReader(conf, split,
|
||||||
|
recordDelimiterBytes);
|
||||||
|
LongWritable key = new LongWritable();
|
||||||
|
Text value = new Text();
|
||||||
|
reader.next(key, value);
|
||||||
|
// Get first record:"1234567890"
|
||||||
|
assertEquals(10, value.getLength());
|
||||||
|
// Position should be 12 right after "1234567890ab"
|
||||||
|
assertEquals(12, reader.getPos());
|
||||||
|
reader.next(key, value);
|
||||||
|
// Get second record:"12"
|
||||||
|
assertEquals(2, value.getLength());
|
||||||
|
// Position should be 16 right after "1234567890ab12ab"
|
||||||
|
assertEquals(16, reader.getPos());
|
||||||
|
reader.next(key, value);
|
||||||
|
// Get third record:"345"
|
||||||
|
assertEquals(3, value.getLength());
|
||||||
|
// Position should be 19 right after "1234567890ab12ab345"
|
||||||
|
assertEquals(19, reader.getPos());
|
||||||
|
assertFalse(reader.next(key, value));
|
||||||
|
assertEquals(19, reader.getPos());
|
||||||
|
|
||||||
|
split = new FileSplit(inputFile, 15, 4, (String[])null);
|
||||||
|
reader = new LineRecordReader(conf, split, recordDelimiterBytes);
|
||||||
|
// No record is in the second split because the second split dropped
|
||||||
|
// the first record, which was already reported by the first split.
|
||||||
|
// The position should be 19 right after "1234567890ab12ab345"
|
||||||
|
assertEquals(19, reader.getPos());
|
||||||
|
assertFalse(reader.next(key, value));
|
||||||
|
assertEquals(19, reader.getPos());
|
||||||
|
|
||||||
|
inputData = "123456789aab";
|
||||||
|
inputFile = createInputFile(conf, inputData);
|
||||||
|
split = new FileSplit(inputFile, 0, 12, (String[])null);
|
||||||
|
reader = new LineRecordReader(conf, split, recordDelimiterBytes);
|
||||||
|
reader.next(key, value);
|
||||||
|
// Get first record:"123456789a"
|
||||||
|
assertEquals(10, value.getLength());
|
||||||
|
// Position should be 12 right after "123456789aab"
|
||||||
|
assertEquals(12, reader.getPos());
|
||||||
|
assertFalse(reader.next(key, value));
|
||||||
|
assertEquals(12, reader.getPos());
|
||||||
|
|
||||||
|
inputData = "123456789a";
|
||||||
|
inputFile = createInputFile(conf, inputData);
|
||||||
|
split = new FileSplit(inputFile, 0, 10, (String[])null);
|
||||||
|
reader = new LineRecordReader(conf, split, recordDelimiterBytes);
|
||||||
|
reader.next(key, value);
|
||||||
|
// Get first record:"123456789a"
|
||||||
|
assertEquals(10, value.getLength());
|
||||||
|
// Position should be 10 right after "123456789a"
|
||||||
|
assertEquals(10, reader.getPos());
|
||||||
|
assertFalse(reader.next(key, value));
|
||||||
|
assertEquals(10, reader.getPos());
|
||||||
|
|
||||||
|
inputData = "123456789ab";
|
||||||
|
inputFile = createInputFile(conf, inputData);
|
||||||
|
split = new FileSplit(inputFile, 0, 11, (String[])null);
|
||||||
|
reader = new LineRecordReader(conf, split, recordDelimiterBytes);
|
||||||
|
reader.next(key, value);
|
||||||
|
// Get first record:"123456789"
|
||||||
|
assertEquals(9, value.getLength());
|
||||||
|
// Position should be 11 right after "123456789ab"
|
||||||
|
assertEquals(11, reader.getPos());
|
||||||
|
assertFalse(reader.next(key, value));
|
||||||
|
assertEquals(11, reader.getPos());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUncompressedInputDefaultDelimiterPosValue()
|
||||||
|
throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
String inputData = "1234567890\r\n12\r\n345";
|
||||||
|
Path inputFile = createInputFile(conf, inputData);
|
||||||
|
conf.setInt("io.file.buffer.size", 10);
|
||||||
|
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
|
||||||
|
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
|
||||||
|
FileSplit split = new FileSplit(inputFile, 0, 15, (String[])null);
|
||||||
|
LineRecordReader reader = new LineRecordReader(conf, split,
|
||||||
|
null);
|
||||||
|
LongWritable key = new LongWritable();
|
||||||
|
Text value = new Text();
|
||||||
|
reader.next(key, value);
|
||||||
|
// Get first record:"1234567890"
|
||||||
|
assertEquals(10, value.getLength());
|
||||||
|
// Position should be 12 right after "1234567890\r\n"
|
||||||
|
assertEquals(12, reader.getPos());
|
||||||
|
reader.next(key, value);
|
||||||
|
// Get second record:"12"
|
||||||
|
assertEquals(2, value.getLength());
|
||||||
|
// Position should be 16 right after "1234567890\r\n12\r\n"
|
||||||
|
assertEquals(16, reader.getPos());
|
||||||
|
assertFalse(reader.next(key, value));
|
||||||
|
|
||||||
|
split = new FileSplit(inputFile, 15, 4, (String[])null);
|
||||||
|
reader = new LineRecordReader(conf, split, null);
|
||||||
|
// The second split dropped the first record "\n"
|
||||||
|
// The position should be 16 right after "1234567890\r\n12\r\n"
|
||||||
|
assertEquals(16, reader.getPos());
|
||||||
|
reader.next(key, value);
|
||||||
|
// Get third record:"345"
|
||||||
|
assertEquals(3, value.getLength());
|
||||||
|
// Position should be 19 right after "1234567890\r\n12\r\n345"
|
||||||
|
assertEquals(19, reader.getPos());
|
||||||
|
assertFalse(reader.next(key, value));
|
||||||
|
assertEquals(19, reader.getPos());
|
||||||
|
|
||||||
|
inputData = "123456789\r\r\n";
|
||||||
|
inputFile = createInputFile(conf, inputData);
|
||||||
|
split = new FileSplit(inputFile, 0, 12, (String[])null);
|
||||||
|
reader = new LineRecordReader(conf, split, null);
|
||||||
|
reader.next(key, value);
|
||||||
|
// Get first record:"123456789"
|
||||||
|
assertEquals(9, value.getLength());
|
||||||
|
// Position should be 10 right after "123456789\r"
|
||||||
|
assertEquals(10, reader.getPos());
|
||||||
|
reader.next(key, value);
|
||||||
|
// Get second record:""
|
||||||
|
assertEquals(0, value.getLength());
|
||||||
|
// Position should be 12 right after "123456789\r\r\n"
|
||||||
|
assertEquals(12, reader.getPos());
|
||||||
|
assertFalse(reader.next(key, value));
|
||||||
|
assertEquals(12, reader.getPos());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
package org.apache.hadoop.mapreduce.lib.input;
|
package org.apache.hadoop.mapreduce.lib.input;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
@ -37,6 +38,8 @@
|
|||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.io.LongWritable;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.io.compress.BZip2Codec;
|
import org.apache.hadoop.io.compress.BZip2Codec;
|
||||||
import org.apache.hadoop.io.compress.CodecPool;
|
import org.apache.hadoop.io.compress.CodecPool;
|
||||||
import org.apache.hadoop.io.compress.Decompressor;
|
import org.apache.hadoop.io.compress.Decompressor;
|
||||||
@ -341,4 +344,162 @@ public void testUncompressedInputContainingCRLF() throws Exception {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUncompressedInputCustomDelimiterPosValue()
|
||||||
|
throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
String inputData = "1234567890ab12ab345";
|
||||||
|
Path inputFile = createInputFile(conf, inputData);
|
||||||
|
conf.setInt("io.file.buffer.size", 10);
|
||||||
|
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
|
||||||
|
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
|
||||||
|
String delimiter = "ab";
|
||||||
|
byte[] recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
|
||||||
|
FileSplit split = new FileSplit(inputFile, 0, 15, (String[])null);
|
||||||
|
TaskAttemptContext context = new TaskAttemptContextImpl(conf,
|
||||||
|
new TaskAttemptID());
|
||||||
|
LineRecordReader reader = new LineRecordReader(recordDelimiterBytes);
|
||||||
|
reader.initialize(split, context);
|
||||||
|
LongWritable key;
|
||||||
|
Text value;
|
||||||
|
reader.nextKeyValue();
|
||||||
|
key = reader.getCurrentKey();
|
||||||
|
value = reader.getCurrentValue();
|
||||||
|
// Get first record:"1234567890"
|
||||||
|
assertEquals(10, value.getLength());
|
||||||
|
assertEquals(0, key.get());
|
||||||
|
reader.nextKeyValue();
|
||||||
|
// Get second record:"12"
|
||||||
|
assertEquals(2, value.getLength());
|
||||||
|
// Key should be 12 right after "1234567890ab"
|
||||||
|
assertEquals(12, key.get());
|
||||||
|
reader.nextKeyValue();
|
||||||
|
// Get third record:"345"
|
||||||
|
assertEquals(3, value.getLength());
|
||||||
|
// Key should be 16 right after "1234567890ab12ab"
|
||||||
|
assertEquals(16, key.get());
|
||||||
|
assertFalse(reader.nextKeyValue());
|
||||||
|
// Key should be 19 right after "1234567890ab12ab345"
|
||||||
|
assertEquals(19, key.get());
|
||||||
|
|
||||||
|
split = new FileSplit(inputFile, 15, 4, (String[])null);
|
||||||
|
reader = new LineRecordReader(recordDelimiterBytes);
|
||||||
|
reader.initialize(split, context);
|
||||||
|
// No record is in the second split because the second split dropped
|
||||||
|
// the first record, which was already reported by the first split.
|
||||||
|
assertFalse(reader.nextKeyValue());
|
||||||
|
|
||||||
|
inputData = "123456789aab";
|
||||||
|
inputFile = createInputFile(conf, inputData);
|
||||||
|
split = new FileSplit(inputFile, 0, 12, (String[])null);
|
||||||
|
reader = new LineRecordReader(recordDelimiterBytes);
|
||||||
|
reader.initialize(split, context);
|
||||||
|
reader.nextKeyValue();
|
||||||
|
key = reader.getCurrentKey();
|
||||||
|
value = reader.getCurrentValue();
|
||||||
|
// Get first record:"123456789a"
|
||||||
|
assertEquals(10, value.getLength());
|
||||||
|
assertEquals(0, key.get());
|
||||||
|
assertFalse(reader.nextKeyValue());
|
||||||
|
// Key should be 12 right after "123456789aab"
|
||||||
|
assertEquals(12, key.get());
|
||||||
|
|
||||||
|
inputData = "123456789a";
|
||||||
|
inputFile = createInputFile(conf, inputData);
|
||||||
|
split = new FileSplit(inputFile, 0, 10, (String[])null);
|
||||||
|
reader = new LineRecordReader(recordDelimiterBytes);
|
||||||
|
reader.initialize(split, context);
|
||||||
|
reader.nextKeyValue();
|
||||||
|
key = reader.getCurrentKey();
|
||||||
|
value = reader.getCurrentValue();
|
||||||
|
// Get first record:"123456789a"
|
||||||
|
assertEquals(10, value.getLength());
|
||||||
|
assertEquals(0, key.get());
|
||||||
|
assertFalse(reader.nextKeyValue());
|
||||||
|
// Key should be 10 right after "123456789a"
|
||||||
|
assertEquals(10, key.get());
|
||||||
|
|
||||||
|
inputData = "123456789ab";
|
||||||
|
inputFile = createInputFile(conf, inputData);
|
||||||
|
split = new FileSplit(inputFile, 0, 11, (String[])null);
|
||||||
|
reader = new LineRecordReader(recordDelimiterBytes);
|
||||||
|
reader.initialize(split, context);
|
||||||
|
reader.nextKeyValue();
|
||||||
|
key = reader.getCurrentKey();
|
||||||
|
value = reader.getCurrentValue();
|
||||||
|
// Get first record:"123456789"
|
||||||
|
assertEquals(9, value.getLength());
|
||||||
|
assertEquals(0, key.get());
|
||||||
|
assertFalse(reader.nextKeyValue());
|
||||||
|
// Key should be 11 right after "123456789ab"
|
||||||
|
assertEquals(11, key.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUncompressedInputDefaultDelimiterPosValue()
|
||||||
|
throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
String inputData = "1234567890\r\n12\r\n345";
|
||||||
|
Path inputFile = createInputFile(conf, inputData);
|
||||||
|
conf.setInt("io.file.buffer.size", 10);
|
||||||
|
conf.setInt(org.apache.hadoop.mapreduce.lib.input.
|
||||||
|
LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
|
||||||
|
FileSplit split = new FileSplit(inputFile, 0, 15, (String[])null);
|
||||||
|
TaskAttemptContext context = new TaskAttemptContextImpl(conf,
|
||||||
|
new TaskAttemptID());
|
||||||
|
LineRecordReader reader = new LineRecordReader(null);
|
||||||
|
reader.initialize(split, context);
|
||||||
|
LongWritable key;
|
||||||
|
Text value;
|
||||||
|
reader.nextKeyValue();
|
||||||
|
key = reader.getCurrentKey();
|
||||||
|
value = reader.getCurrentValue();
|
||||||
|
// Get first record:"1234567890"
|
||||||
|
assertEquals(10, value.getLength());
|
||||||
|
assertEquals(0, key.get());
|
||||||
|
reader.nextKeyValue();
|
||||||
|
// Get second record:"12"
|
||||||
|
assertEquals(2, value.getLength());
|
||||||
|
// Key should be 12 right after "1234567890\r\n"
|
||||||
|
assertEquals(12, key.get());
|
||||||
|
assertFalse(reader.nextKeyValue());
|
||||||
|
// Key should be 16 right after "1234567890\r\n12\r\n"
|
||||||
|
assertEquals(16, key.get());
|
||||||
|
|
||||||
|
split = new FileSplit(inputFile, 15, 4, (String[])null);
|
||||||
|
reader = new LineRecordReader(null);
|
||||||
|
reader.initialize(split, context);
|
||||||
|
// The second split dropped the first record "\n"
|
||||||
|
reader.nextKeyValue();
|
||||||
|
key = reader.getCurrentKey();
|
||||||
|
value = reader.getCurrentValue();
|
||||||
|
// Get third record:"345"
|
||||||
|
assertEquals(3, value.getLength());
|
||||||
|
// Key should be 16 right after "1234567890\r\n12\r\n"
|
||||||
|
assertEquals(16, key.get());
|
||||||
|
assertFalse(reader.nextKeyValue());
|
||||||
|
// Key should be 19 right after "1234567890\r\n12\r\n345"
|
||||||
|
assertEquals(19, key.get());
|
||||||
|
|
||||||
|
inputData = "123456789\r\r\n";
|
||||||
|
inputFile = createInputFile(conf, inputData);
|
||||||
|
split = new FileSplit(inputFile, 0, 12, (String[])null);
|
||||||
|
reader = new LineRecordReader(null);
|
||||||
|
reader.initialize(split, context);
|
||||||
|
reader.nextKeyValue();
|
||||||
|
key = reader.getCurrentKey();
|
||||||
|
value = reader.getCurrentValue();
|
||||||
|
// Get first record:"123456789"
|
||||||
|
assertEquals(9, value.getLength());
|
||||||
|
assertEquals(0, key.get());
|
||||||
|
reader.nextKeyValue();
|
||||||
|
// Get second record:""
|
||||||
|
assertEquals(0, value.getLength());
|
||||||
|
// Key should be 10 right after "123456789\r"
|
||||||
|
assertEquals(10, key.get());
|
||||||
|
assertFalse(reader.nextKeyValue());
|
||||||
|
// Key should be 12 right after "123456789\r\r\n"
|
||||||
|
assertEquals(12, key.get());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user