HADOOP-19035. CrcUtil/CrcComposer should not throw IOException for non-IO. (#6443)

This commit is contained in:
Tsz-Wo Nicholas Sze 2024-01-25 10:35:32 -08:00 committed by GitHub
parent ac471d7daa
commit da34ecdb83
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 67 additions and 98 deletions

View File

@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -46,7 +46,7 @@ public class CrcComposer {
private int curCompositeCrc = 0;
private long curPositionInStripe = 0;
private ByteArrayOutputStream digestOut = new ByteArrayOutputStream();
private final ByteArrayOutputStream digestOut = new ByteArrayOutputStream();
/**
* Returns a CrcComposer which will collapse all ingested CRCs into a single
@ -54,12 +54,10 @@ public class CrcComposer {
*
* @param type type.
* @param bytesPerCrcHint bytesPerCrcHint.
* @throws IOException raised on errors performing I/O.
* @return a CrcComposer which will collapse all ingested CRCs into a single value.
*/
public static CrcComposer newCrcComposer(
DataChecksum.Type type, long bytesPerCrcHint)
throws IOException {
DataChecksum.Type type, long bytesPerCrcHint) {
return newStripedCrcComposer(type, bytesPerCrcHint, Long.MAX_VALUE);
}
@ -78,11 +76,9 @@ public static CrcComposer newCrcComposer(
* @param stripeLength stripeLength.
* @return a CrcComposer which will collapse CRCs for every combined.
* underlying data size which aligns with the specified stripe boundary.
* @throws IOException raised on errors performing I/O.
*/
public static CrcComposer newStripedCrcComposer(
DataChecksum.Type type, long bytesPerCrcHint, long stripeLength)
throws IOException {
DataChecksum.Type type, long bytesPerCrcHint, long stripeLength) {
int polynomial = DataChecksum.getCrcPolynomialForType(type);
return new CrcComposer(
polynomial,
@ -118,13 +114,10 @@ public static CrcComposer newStripedCrcComposer(
* @param offset offset.
* @param length must be a multiple of the expected byte-size of a CRC.
* @param bytesPerCrc bytesPerCrc.
* @throws IOException raised on errors performing I/O.
*/
public void update(
byte[] crcBuffer, int offset, int length, long bytesPerCrc)
throws IOException {
public void update(byte[] crcBuffer, int offset, int length, long bytesPerCrc) {
if (length % CRC_SIZE_BYTES != 0) {
throw new IOException(String.format(
throw new IllegalArgumentException(String.format(
"Trying to update CRC from byte array with length '%d' at offset "
+ "'%d' which is not a multiple of %d!",
length, offset, CRC_SIZE_BYTES));
@ -162,9 +155,8 @@ public void update(
*
* @param crcB crcB.
* @param bytesPerCrc bytesPerCrc.
* @throws IOException raised on errors performing I/O.
*/
public void update(int crcB, long bytesPerCrc) throws IOException {
public void update(int crcB, long bytesPerCrc) {
if (curCompositeCrc == 0) {
curCompositeCrc = crcB;
} else if (bytesPerCrc == bytesPerCrcHint) {
@ -178,7 +170,7 @@ public void update(int crcB, long bytesPerCrc) throws IOException {
curPositionInStripe += bytesPerCrc;
if (curPositionInStripe > stripeLength) {
throw new IOException(String.format(
throw new IllegalStateException(String.format(
"Current position in stripe '%d' after advancing by bytesPerCrc '%d' "
+ "exceeds stripeLength '%d' without stripe alignment.",
curPositionInStripe, bytesPerCrc, stripeLength));

View File

@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -21,7 +21,6 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import java.io.IOException;
import java.util.Arrays;
/**
@ -112,15 +111,7 @@ public static int compose(int crcA, int crcB, long lengthB, int mod) {
*/
public static byte[] intToBytes(int value) {
byte[] buf = new byte[4];
try {
writeInt(buf, 0, value);
} catch (IOException ioe) {
// Since this should only be able to occur from code bugs within this
// class rather than user input, we throw as a RuntimeException
// rather than requiring this method to declare throwing IOException
// for something the caller can't control.
throw new RuntimeException(ioe);
}
return buf;
}
@ -132,16 +123,14 @@ public static byte[] intToBytes(int value) {
* @param buf buf size.
* @param offset offset.
* @param value value.
* @throws IOException raised on errors performing I/O.
*/
public static void writeInt(byte[] buf, int offset, int value)
throws IOException {
public static void writeInt(byte[] buf, int offset, int value) {
if (offset + 4 > buf.length) {
throw new IOException(String.format(
throw new ArrayIndexOutOfBoundsException(String.format(
"writeInt out of bounds: buf.length=%d, offset=%d",
buf.length, offset));
}
buf[offset + 0] = (byte)((value >>> 24) & 0xff);
buf[offset ] = (byte)((value >>> 24) & 0xff);
buf[offset + 1] = (byte)((value >>> 16) & 0xff);
buf[offset + 2] = (byte)((value >>> 8) & 0xff);
buf[offset + 3] = (byte)(value & 0xff);
@ -154,20 +143,17 @@ public static void writeInt(byte[] buf, int offset, int value)
* @param offset offset.
* @param buf buf.
* @return int.
* @throws IOException raised on errors performing I/O.
*/
public static int readInt(byte[] buf, int offset)
throws IOException {
public static int readInt(byte[] buf, int offset) {
if (offset + 4 > buf.length) {
throw new IOException(String.format(
throw new ArrayIndexOutOfBoundsException(String.format(
"readInt out of bounds: buf.length=%d, offset=%d",
buf.length, offset));
}
int value = ((buf[offset + 0] & 0xff) << 24) |
return ((buf[offset ] & 0xff) << 24) |
((buf[offset + 1] & 0xff) << 16) |
((buf[offset + 2] & 0xff) << 8) |
((buf[offset + 3] & 0xff));
return value;
}
/**
@ -176,13 +162,11 @@ public static int readInt(byte[] buf, int offset)
* formatted value.
*
* @param bytes bytes.
* @throws IOException raised on errors performing I/O.
* @return a list of hex formatted values.
*/
public static String toSingleCrcString(final byte[] bytes)
throws IOException {
public static String toSingleCrcString(final byte[] bytes) {
if (bytes.length != 4) {
throw new IOException((String.format(
throw new IllegalArgumentException((String.format(
"Unexpected byte[] length '%d' for single CRC. Contents: %s",
bytes.length, Arrays.toString(bytes))));
}
@ -195,13 +179,11 @@ public static String toSingleCrcString(final byte[] bytes)
* hex formatted values.
*
* @param bytes bytes.
* @throws IOException raised on errors performing I/O.
* @return a list of hex formatted values.
*/
public static String toMultiCrcString(final byte[] bytes)
throws IOException {
public static String toMultiCrcString(final byte[] bytes) {
if (bytes.length % 4 != 0) {
throw new IOException((String.format(
throw new IllegalArgumentException((String.format(
"Unexpected byte[] length '%d' not divisible by 4. Contents: %s",
bytes.length, Arrays.toString(bytes))));
}

View File

@ -117,17 +117,15 @@ static Checksum newCrc32C() {
* @param type type.
* @return the int representation of the polynomial associated with the
* CRC {@code type}, suitable for use with further CRC arithmetic.
* @throws IOException if there is no CRC polynomial applicable
* to the given {@code type}.
*/
public static int getCrcPolynomialForType(Type type) throws IOException {
public static int getCrcPolynomialForType(Type type) {
switch (type) {
case CRC32:
return CrcUtil.GZIP_POLYNOMIAL;
case CRC32C:
return CrcUtil.CASTAGNOLI_POLYNOMIAL;
default:
throw new IOException(
throw new IllegalArgumentException(
"No CRC polynomial could be associated with type: " + type);
}
}
@ -155,10 +153,10 @@ public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) {
* @param bytes bytes.
* @param offset offset.
* @return DataChecksum of the type in the array or null in case of an error.
* @throws IOException raised on errors performing I/O.
* @throws InvalidChecksumSizeException when the stored checksum is invalid.
*/
public static DataChecksum newDataChecksum(byte[] bytes, int offset)
throws IOException {
throws InvalidChecksumSizeException {
if (offset < 0 || bytes.length < offset + getChecksumHeaderSize()) {
throw new InvalidChecksumSizeException("Could not create DataChecksum "
+ " from the byte array of length " + bytes.length

View File

@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -20,6 +20,7 @@
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.util.Objects;
import java.util.Random;
import java.util.concurrent.TimeUnit;
@ -38,19 +39,18 @@ public class TestCrcComposer {
@Rule
public Timeout globalTimeout = new Timeout(10000, TimeUnit.MILLISECONDS);
private Random rand = new Random(1234);
private final Random rand = new Random(1234);
private DataChecksum.Type type = DataChecksum.Type.CRC32C;
private DataChecksum checksum = DataChecksum.newDataChecksum(
type, Integer.MAX_VALUE);
private int dataSize = 75;
private byte[] data = new byte[dataSize];
private int chunkSize = 10;
private int cellSize = 20;
private final DataChecksum.Type type = DataChecksum.Type.CRC32C;
private final DataChecksum checksum = Objects.requireNonNull(
DataChecksum.newDataChecksum(type, Integer.MAX_VALUE));
private final int dataSize = 75;
private final byte[] data = new byte[dataSize];
private final int chunkSize = 10;
private final int cellSize = 20;
private int fullCrc;
private int[] crcsByChunk;
private int[] crcsByCell;
private byte[] crcBytesByChunk;
private byte[] crcBytesByCell;
@ -69,7 +69,7 @@ public void setup() throws IOException {
data, (crcsByChunk.length - 1) * chunkSize, dataSize % chunkSize);
// 3 cells of size cellSize, 1 cell of size (dataSize % cellSize).
crcsByCell = new int[4];
int[] crcsByCell = new int[4];
for (int i = 0; i < 3; ++i) {
crcsByCell[i] = getRangeChecksum(data, i * cellSize, cellSize);
}
@ -86,7 +86,7 @@ private int getRangeChecksum(byte[] buf, int offset, int length) {
return (int) checksum.getValue();
}
private byte[] intArrayToByteArray(int[] values) throws IOException {
private byte[] intArrayToByteArray(int[] values) {
byte[] bytes = new byte[values.length * 4];
for (int i = 0; i < values.length; ++i) {
CrcUtil.writeInt(bytes, i * 4, values[i]);
@ -95,7 +95,7 @@ private byte[] intArrayToByteArray(int[] values) throws IOException {
}
@Test
public void testUnstripedIncorrectChunkSize() throws IOException {
public void testUnstripedIncorrectChunkSize() {
CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize);
// If we incorrectly specify that all CRCs ingested correspond to chunkSize
@ -110,7 +110,7 @@ public void testUnstripedIncorrectChunkSize() throws IOException {
}
@Test
public void testUnstripedByteArray() throws IOException {
public void testUnstripedByteArray() {
CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize);
digester.update(crcBytesByChunk, 0, crcBytesByChunk.length - 4, chunkSize);
digester.update(
@ -137,7 +137,7 @@ public void testUnstripedDataInputStream() throws IOException {
}
@Test
public void testUnstripedSingleCrcs() throws IOException {
public void testUnstripedSingleCrcs() {
CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize);
for (int i = 0; i < crcsByChunk.length - 1; ++i) {
digester.update(crcsByChunk[i], chunkSize);
@ -151,7 +151,7 @@ public void testUnstripedSingleCrcs() throws IOException {
}
@Test
public void testStripedByteArray() throws IOException {
public void testStripedByteArray() {
CrcComposer digester =
CrcComposer.newStripedCrcComposer(type, chunkSize, cellSize);
digester.update(crcBytesByChunk, 0, crcBytesByChunk.length - 4, chunkSize);
@ -176,7 +176,7 @@ public void testStripedDataInputStream() throws IOException {
}
@Test
public void testStripedSingleCrcs() throws IOException {
public void testStripedSingleCrcs() {
CrcComposer digester =
CrcComposer.newStripedCrcComposer(type, chunkSize, cellSize);
for (int i = 0; i < crcsByChunk.length - 1; ++i) {
@ -225,7 +225,7 @@ public void testUpdateMismatchesStripe() throws Exception {
// boundary in a single CRC, which is not allowed, since we'd lack a
// CRC corresponding to the actual cellSize boundary.
LambdaTestUtils.intercept(
IOException.class,
IllegalStateException.class,
"stripe",
() -> digester.update(crcsByChunk[1], cellSize));
}
@ -236,7 +236,7 @@ public void testUpdateByteArrayLengthUnalignedWithCrcSize()
CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize);
LambdaTestUtils.intercept(
IOException.class,
IllegalArgumentException.class,
"length",
() -> digester.update(crcBytesByChunk, 0, 6, chunkSize));
}

View File

@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,7 +17,7 @@
*/
package org.apache.hadoop.util;
import java.io.IOException;
import java.util.Objects;
import java.util.Random;
import java.util.concurrent.TimeUnit;
@ -35,12 +35,12 @@ public class TestCrcUtil {
@Rule
public Timeout globalTimeout = new Timeout(10000, TimeUnit.MILLISECONDS);
private Random rand = new Random(1234);
private static final Random RANDOM = new Random(1234);
@Test
public void testComposeCrc32() throws IOException {
public void testComposeCrc32() {
byte[] data = new byte[64 * 1024];
rand.nextBytes(data);
RANDOM.nextBytes(data);
doTestComposeCrc(data, DataChecksum.Type.CRC32, 512, false);
doTestComposeCrc(data, DataChecksum.Type.CRC32, 511, false);
doTestComposeCrc(data, DataChecksum.Type.CRC32, 32 * 1024, false);
@ -48,9 +48,9 @@ public void testComposeCrc32() throws IOException {
}
@Test
public void testComposeCrc32c() throws IOException {
public void testComposeCrc32c() {
byte[] data = new byte[64 * 1024];
rand.nextBytes(data);
RANDOM.nextBytes(data);
doTestComposeCrc(data, DataChecksum.Type.CRC32C, 512, false);
doTestComposeCrc(data, DataChecksum.Type.CRC32C, 511, false);
doTestComposeCrc(data, DataChecksum.Type.CRC32C, 32 * 1024, false);
@ -58,9 +58,9 @@ public void testComposeCrc32c() throws IOException {
}
@Test
public void testComposeCrc32WithMonomial() throws IOException {
public void testComposeCrc32WithMonomial() {
byte[] data = new byte[64 * 1024];
rand.nextBytes(data);
RANDOM.nextBytes(data);
doTestComposeCrc(data, DataChecksum.Type.CRC32, 512, true);
doTestComposeCrc(data, DataChecksum.Type.CRC32, 511, true);
doTestComposeCrc(data, DataChecksum.Type.CRC32, 32 * 1024, true);
@ -68,9 +68,9 @@ public void testComposeCrc32WithMonomial() throws IOException {
}
@Test
public void testComposeCrc32cWithMonomial() throws IOException {
public void testComposeCrc32cWithMonomial() {
byte[] data = new byte[64 * 1024];
rand.nextBytes(data);
RANDOM.nextBytes(data);
doTestComposeCrc(data, DataChecksum.Type.CRC32C, 512, true);
doTestComposeCrc(data, DataChecksum.Type.CRC32C, 511, true);
doTestComposeCrc(data, DataChecksum.Type.CRC32C, 32 * 1024, true);
@ -78,12 +78,12 @@ public void testComposeCrc32cWithMonomial() throws IOException {
}
@Test
public void testComposeCrc32ZeroLength() throws IOException {
public void testComposeCrc32ZeroLength() {
doTestComposeCrcZerolength(DataChecksum.Type.CRC32);
}
@Test
public void testComposeCrc32CZeroLength() throws IOException {
public void testComposeCrc32CZeroLength() {
doTestComposeCrcZerolength(DataChecksum.Type.CRC32C);
}
@ -93,13 +93,13 @@ public void testComposeCrc32CZeroLength() throws IOException {
* corresponding to ever {@code chunkSize} bytes.
*/
private static void doTestComposeCrc(
byte[] data, DataChecksum.Type type, int chunkSize, boolean useMonomial)
throws IOException {
byte[] data, DataChecksum.Type type, int chunkSize, boolean useMonomial) {
int crcPolynomial = DataChecksum.getCrcPolynomialForType(type);
// Get full end-to-end CRC in a single shot first.
DataChecksum checksum = DataChecksum.newDataChecksum(
type, Integer.MAX_VALUE);
Objects.requireNonNull(checksum, "checksum");
checksum.update(data, 0, data.length);
int fullCrc = (int) checksum.getValue();
@ -145,14 +145,14 @@ private static void doTestComposeCrc(
* Helper method for testing the behavior of composing a CRC with a
* zero-length second CRC.
*/
private static void doTestComposeCrcZerolength(DataChecksum.Type type)
throws IOException {
private static void doTestComposeCrcZerolength(DataChecksum.Type type) {
// Without loss of generality, we can pick any integer as our fake crcA
// even if we don't happen to know the preimage.
int crcA = 0xCAFEBEEF;
int crcPolynomial = DataChecksum.getCrcPolynomialForType(type);
DataChecksum checksum = DataChecksum.newDataChecksum(
type, Integer.MAX_VALUE);
Objects.requireNonNull(checksum, "checksum");
int crcB = (int) checksum.getValue();
assertEquals(crcA, CrcUtil.compose(crcA, crcB, 0, crcPolynomial));
@ -162,7 +162,7 @@ private static void doTestComposeCrcZerolength(DataChecksum.Type type)
}
@Test
public void testIntSerialization() throws IOException {
public void testIntSerialization() {
byte[] bytes = CrcUtil.intToBytes(0xCAFEBEEF);
assertEquals(0xCAFEBEEF, CrcUtil.readInt(bytes, 0));
@ -180,13 +180,13 @@ public void testIntSerialization() throws IOException {
public void testToSingleCrcStringBadLength()
throws Exception {
LambdaTestUtils.intercept(
IOException.class,
IllegalArgumentException.class,
"length",
() -> CrcUtil.toSingleCrcString(new byte[8]));
}
@Test
public void testToSingleCrcString() throws IOException {
public void testToSingleCrcString() {
byte[] buf = CrcUtil.intToBytes(0xcafebeef);
assertEquals(
"0xcafebeef", CrcUtil.toSingleCrcString(buf));
@ -196,14 +196,13 @@ public void testToSingleCrcString() throws IOException {
public void testToMultiCrcStringBadLength()
throws Exception {
LambdaTestUtils.intercept(
IOException.class,
IllegalArgumentException.class,
"length",
() -> CrcUtil.toMultiCrcString(new byte[6]));
}
@Test
public void testToMultiCrcStringMultipleElements()
throws IOException {
public void testToMultiCrcStringMultipleElements() {
byte[] buf = new byte[12];
CrcUtil.writeInt(buf, 0, 0xcafebeef);
CrcUtil.writeInt(buf, 4, 0xababcccc);
@ -214,8 +213,7 @@ public void testToMultiCrcStringMultipleElements()
}
@Test
public void testToMultiCrcStringSingleElement()
throws IOException {
public void testToMultiCrcStringSingleElement() {
byte[] buf = new byte[4];
CrcUtil.writeInt(buf, 0, 0xcafebeef);
assertEquals(
@ -224,8 +222,7 @@ public void testToMultiCrcStringSingleElement()
}
@Test
public void testToMultiCrcStringNoElements()
throws IOException {
public void testToMultiCrcStringNoElements() {
assertEquals(
"[]",
CrcUtil.toMultiCrcString(new byte[0]));