HADOOP-16148. Cleanup LineReader Unit Test.

Contributed by David Mollitor.

Signed-off-by: Steve Loughran <stevel@apache.org>
This commit is contained in:
David Mollitor 2019-03-04 23:08:12 +00:00 committed by Steve Loughran
parent 90c37ac40d
commit 9fcd89ab93
No known key found for this signature in database
GPG Key ID: D22CF846DBB162A0

View File

@ -19,107 +19,107 @@
package org.apache.hadoop.util; package org.apache.hadoop.util;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.LineReader; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import org.junit.Assert;
public class TestLineReader { public class TestLineReader {
private LineReader lineReader;
private String TestData;
private String Delimiter;
private Text line;
@Test /**
public void testCustomDelimiter() throws Exception { * TEST_1: The test scenario is the tail of the buffer equals the starting
/* TEST_1 * character/s of delimiter.
* The test scenario is the tail of the buffer
* equals the starting character/s of delimiter
* *
* The Test Data is such that, * The Test Data is such that,
* *
* 1) we will have "</entity>" as delimiter * 1) we will have "&lt;/entity&gt;" as delimiter
* *
* 2) The tail of the current buffer would be "</" * 2) The tail of the current buffer would be "&lt;/" which matches with the
* which matches with the starting character sequence of delimiter. * starting character sequence of delimiter.
* *
* 3) The Head of the next buffer would be "id>" * 3) The Head of the next buffer would be "id&gt;" which does NOT match with
* which does NOT match with the remaining characters of delimiter. * the remaining characters of delimiter.
* *
* 4) Input data would be prefixed by char 'a' * 4) Input data would be prefixed by char 'a' about
* about numberOfCharToFillTheBuffer times. * numberOfCharToFillTheBuffer times. So that, one iteration to buffer the
* So that, one iteration to buffer the input data, * input data, would end at '&lt;/' ie equals starting 2 char of delimiter
* would end at '</' ie equals starting 2 char of delimiter
* *
* 5) For this we would take BufferSize as 64 * 1024; * 5) For this we would take BufferSize as 64 * 1024;
* *
* Check Condition * Check Condition In the second key value pair, the value should contain
* In the second key value pair, the value should contain * "&lt;/" from currentToken and "id&gt;" from next token
* "</" from currentToken and
* "id>" from next token
*/ */
@Test
public void testCustomDelimiter1() throws Exception {
Delimiter="</entity>"; final String delimiter = "</entity>";
String CurrentBufferTailToken=
"</entity><entity><id>Gelesh</";
// Ending part of Input Data Buffer // Ending part of Input Data Buffer
// It contains '</' ie delimiter character // It contains '</' ie delimiter character
final String currentBufferTailToken = "</entity><entity><id>Gelesh</";
String NextBufferHeadToken=
"id><name>Omathil</name></entity>";
// Supposing the start of next buffer is this // Supposing the start of next buffer is this
final String nextBufferHeadToken = "id><name>Omathil</name></entity>";
String Expected = // Expected must capture from both the buffer, excluding Delimiter
(CurrentBufferTailToken+NextBufferHeadToken) final String expected =
.replace(Delimiter, ""); (currentBufferTailToken + nextBufferHeadToken).replace(delimiter, "");
// Expected ,must capture from both the buffer, excluding Delimiter
String TestPartOfInput = CurrentBufferTailToken+NextBufferHeadToken; final String testPartOfInput = currentBufferTailToken + nextBufferHeadToken;
int BufferSize=64 * 1024; final int bufferSize = 64 * 1024;
int numberOfCharToFillTheBuffer = int numberOfCharToFillTheBuffer =
BufferSize - CurrentBufferTailToken.length(); bufferSize - currentBufferTailToken.length();
StringBuilder fillerString=new StringBuilder();
for (int i=0; i<numberOfCharToFillTheBuffer; i++) {
fillerString.append('a'); // char 'a' as a filler for the test string
}
TestData = fillerString + TestPartOfInput; final char[] fillBuffer = new char[numberOfCharToFillTheBuffer];
lineReader = new LineReader(
new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());
line = new Text(); // char 'a' as a filler for the test string
Arrays.fill(fillBuffer, 'a');
final StringBuilder fillerString = new StringBuilder();
final String testData = fillerString + testPartOfInput;
final LineReader lineReader = new LineReader(
new ByteArrayInputStream(testData.getBytes(StandardCharsets.UTF_8)),
delimiter.getBytes(StandardCharsets.UTF_8));
final Text line = new Text();
lineReader.readLine(line); lineReader.readLine(line);
lineReader.close();
Assert.assertEquals(fillerString.toString(), line.toString()); Assert.assertEquals(fillerString.toString(), line.toString());
lineReader.readLine(line); lineReader.readLine(line);
Assert.assertEquals(Expected, line.toString()); Assert.assertEquals(expected, line.toString());
}
/*TEST_2 /**
* The test scenario is such that, * TEST_2: The test scenario is such that, the character/s preceding the
* the character/s preceding the delimiter, * delimiter, equals the starting character/s of delimiter.
* equals the starting character/s of delimiter
*/ */
@Test
public void testCustomDelimiter2() throws Exception {
final String delimiter = "record";
final StringBuilder testStringBuilder = new StringBuilder();
Delimiter = "record"; testStringBuilder.append(delimiter).append("Kerala ");
StringBuilder TestStringBuilder = new StringBuilder(); testStringBuilder.append(delimiter).append("Bangalore");
testStringBuilder.append(delimiter).append(" North Korea");
testStringBuilder.append(delimiter).append(delimiter).append("Guantanamo");
TestStringBuilder.append(Delimiter + "Kerala "); // ~EOF with 're'
TestStringBuilder.append(Delimiter + "Bangalore"); testStringBuilder.append(delimiter + "ecord" + "recor" + "core");
TestStringBuilder.append(Delimiter + " North Korea");
TestStringBuilder.append(Delimiter + Delimiter+
"Guantanamo");
TestStringBuilder.append(Delimiter + "ecord"
+ "recor" + "core"); //~EOF with 're'
TestData=TestStringBuilder.toString(); final String testData = testStringBuilder.toString();
lineReader = new LineReader( final LineReader lineReader = new LineReader(
new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes()); new ByteArrayInputStream(testData.getBytes(StandardCharsets.UTF_8)),
delimiter.getBytes((StandardCharsets.UTF_8)));
final Text line = new Text();
lineReader.readLine(line); lineReader.readLine(line);
Assert.assertEquals("", line.toString()); Assert.assertEquals("", line.toString());
@ -138,19 +138,29 @@ public class TestLineReader {
Assert.assertEquals("Guantanamo", line.toString()); Assert.assertEquals("Guantanamo", line.toString());
lineReader.readLine(line); lineReader.readLine(line);
Assert.assertEquals(("ecord"+"recor"+"core"), line.toString()); Assert.assertEquals(("ecord" + "recor" + "core"), line.toString());
// Test 3 lineReader.close();
// The test scenario is such that, }
// aaaabccc split by aaab
TestData = "aaaabccc"; /**
Delimiter = "aaab"; * Test 3: The test scenario is such that, aaabccc split by aaab.
lineReader = new LineReader( */
new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes()); @Test
public void testCustomDelimiter3() throws Exception {
final String testData = "aaaabccc";
final String delimiter = "aaab";
final LineReader lineReader = new LineReader(
new ByteArrayInputStream(testData.getBytes(StandardCharsets.UTF_8)),
delimiter.getBytes(StandardCharsets.UTF_8));
final Text line = new Text();
lineReader.readLine(line); lineReader.readLine(line);
Assert.assertEquals("a", line.toString()); Assert.assertEquals("a", line.toString());
lineReader.readLine(line); lineReader.readLine(line);
Assert.assertEquals("ccc", line.toString()); Assert.assertEquals("ccc", line.toString());
lineReader.close();
} }
} }