From da34ecdb83dca7308ea44adf3349db7cc190ae7d Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Thu, 25 Jan 2024 10:35:32 -0800 Subject: [PATCH] HADOOP-19035. CrcUtil/CrcComposer should not throw IOException for non-IO. (#6443) --- .../org/apache/hadoop/util/CrcComposer.java | 24 +++------ .../java/org/apache/hadoop/util/CrcUtil.java | 42 +++++---------- .../org/apache/hadoop/util/DataChecksum.java | 10 ++-- .../apache/hadoop/util/TestCrcComposer.java | 38 +++++++------- .../org/apache/hadoop/util/TestCrcUtil.java | 51 +++++++++---------- 5 files changed, 67 insertions(+), 98 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcComposer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcComposer.java index 5bf773cef3..a35fe23bf4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcComposer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcComposer.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -46,7 +46,7 @@ public class CrcComposer { private int curCompositeCrc = 0; private long curPositionInStripe = 0; - private ByteArrayOutputStream digestOut = new ByteArrayOutputStream(); + private final ByteArrayOutputStream digestOut = new ByteArrayOutputStream(); /** * Returns a CrcComposer which will collapse all ingested CRCs into a single @@ -54,12 +54,10 @@ public class CrcComposer { * * @param type type. * @param bytesPerCrcHint bytesPerCrcHint. - * @throws IOException raised on errors performing I/O. * @return a CrcComposer which will collapse all ingested CRCs into a single value. */ public static CrcComposer newCrcComposer( - DataChecksum.Type type, long bytesPerCrcHint) - throws IOException { + DataChecksum.Type type, long bytesPerCrcHint) { return newStripedCrcComposer(type, bytesPerCrcHint, Long.MAX_VALUE); } @@ -78,11 +76,9 @@ public static CrcComposer newCrcComposer( * @param stripeLength stripeLength. * @return a CrcComposer which will collapse CRCs for every combined. * underlying data size which aligns with the specified stripe boundary. - * @throws IOException raised on errors performing I/O. */ public static CrcComposer newStripedCrcComposer( - DataChecksum.Type type, long bytesPerCrcHint, long stripeLength) - throws IOException { + DataChecksum.Type type, long bytesPerCrcHint, long stripeLength) { int polynomial = DataChecksum.getCrcPolynomialForType(type); return new CrcComposer( polynomial, @@ -118,13 +114,10 @@ public static CrcComposer newStripedCrcComposer( * @param offset offset. * @param length must be a multiple of the expected byte-size of a CRC. * @param bytesPerCrc bytesPerCrc. - * @throws IOException raised on errors performing I/O. */ - public void update( - byte[] crcBuffer, int offset, int length, long bytesPerCrc) - throws IOException { + public void update(byte[] crcBuffer, int offset, int length, long bytesPerCrc) { if (length % CRC_SIZE_BYTES != 0) { - throw new IOException(String.format( + throw new IllegalArgumentException(String.format( "Trying to update CRC from byte array with length '%d' at offset " + "'%d' which is not a multiple of %d!", length, offset, CRC_SIZE_BYTES)); @@ -162,9 +155,8 @@ public void update( * * @param crcB crcB. * @param bytesPerCrc bytesPerCrc. - * @throws IOException raised on errors performing I/O. */ - public void update(int crcB, long bytesPerCrc) throws IOException { + public void update(int crcB, long bytesPerCrc) { if (curCompositeCrc == 0) { curCompositeCrc = crcB; } else if (bytesPerCrc == bytesPerCrcHint) { @@ -178,7 +170,7 @@ public void update(int crcB, long bytesPerCrc) throws IOException { curPositionInStripe += bytesPerCrc; if (curPositionInStripe > stripeLength) { - throw new IOException(String.format( + throw new IllegalStateException(String.format( "Current position in stripe '%d' after advancing by bytesPerCrc '%d' " + "exceeds stripeLength '%d' without stripe alignment.", curPositionInStripe, bytesPerCrc, stripeLength)); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcUtil.java index c8183b042f..973363bf8d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/CrcUtil.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -21,7 +21,6 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import java.io.IOException; import java.util.Arrays; /** @@ -112,15 +111,7 @@ public static int compose(int crcA, int crcB, long lengthB, int mod) { */ public static byte[] intToBytes(int value) { byte[] buf = new byte[4]; - try { - writeInt(buf, 0, value); - } catch (IOException ioe) { - // Since this should only be able to occur from code bugs within this - // class rather than user input, we throw as a RuntimeException - // rather than requiring this method to declare throwing IOException - // for something the caller can't control. - throw new RuntimeException(ioe); - } + writeInt(buf, 0, value); return buf; } @@ -132,16 +123,14 @@ public static byte[] intToBytes(int value) { * @param buf buf size. * @param offset offset. * @param value value. - * @throws IOException raised on errors performing I/O. */ - public static void writeInt(byte[] buf, int offset, int value) - throws IOException { + public static void writeInt(byte[] buf, int offset, int value) { if (offset + 4 > buf.length) { - throw new IOException(String.format( + throw new ArrayIndexOutOfBoundsException(String.format( "writeInt out of bounds: buf.length=%d, offset=%d", buf.length, offset)); } - buf[offset + 0] = (byte)((value >>> 24) & 0xff); + buf[offset ] = (byte)((value >>> 24) & 0xff); buf[offset + 1] = (byte)((value >>> 16) & 0xff); buf[offset + 2] = (byte)((value >>> 8) & 0xff); buf[offset + 3] = (byte)(value & 0xff); @@ -154,20 +143,17 @@ public static void writeInt(byte[] buf, int offset, int value) * @param offset offset. * @param buf buf. * @return int. - * @throws IOException raised on errors performing I/O. */ - public static int readInt(byte[] buf, int offset) - throws IOException { + public static int readInt(byte[] buf, int offset) { if (offset + 4 > buf.length) { - throw new IOException(String.format( + throw new ArrayIndexOutOfBoundsException(String.format( "readInt out of bounds: buf.length=%d, offset=%d", buf.length, offset)); } - int value = ((buf[offset + 0] & 0xff) << 24) | + return ((buf[offset ] & 0xff) << 24) | ((buf[offset + 1] & 0xff) << 16) | ((buf[offset + 2] & 0xff) << 8) | ((buf[offset + 3] & 0xff)); - return value; } /** @@ -176,13 +162,11 @@ public static int readInt(byte[] buf, int offset) * formatted value. * * @param bytes bytes. - * @throws IOException raised on errors performing I/O. * @return a list of hex formatted values. */ - public static String toSingleCrcString(final byte[] bytes) - throws IOException { + public static String toSingleCrcString(final byte[] bytes) { if (bytes.length != 4) { - throw new IOException((String.format( + throw new IllegalArgumentException((String.format( "Unexpected byte[] length '%d' for single CRC. Contents: %s", bytes.length, Arrays.toString(bytes)))); } @@ -195,13 +179,11 @@ public static String toSingleCrcString(final byte[] bytes) * hex formatted values. * * @param bytes bytes. - * @throws IOException raised on errors performing I/O. * @return a list of hex formatted values. */ - public static String toMultiCrcString(final byte[] bytes) - throws IOException { + public static String toMultiCrcString(final byte[] bytes) { if (bytes.length % 4 != 0) { - throw new IOException((String.format( + throw new IllegalArgumentException((String.format( "Unexpected byte[] length '%d' not divisible by 4. Contents: %s", bytes.length, Arrays.toString(bytes)))); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java index 1c37d5944c..aac0936a66 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java @@ -117,17 +117,15 @@ static Checksum newCrc32C() { * @param type type. * @return the int representation of the polynomial associated with the * CRC {@code type}, suitable for use with further CRC arithmetic. - * @throws IOException if there is no CRC polynomial applicable - * to the given {@code type}. */ - public static int getCrcPolynomialForType(Type type) throws IOException { + public static int getCrcPolynomialForType(Type type) { switch (type) { case CRC32: return CrcUtil.GZIP_POLYNOMIAL; case CRC32C: return CrcUtil.CASTAGNOLI_POLYNOMIAL; default: - throw new IOException( + throw new IllegalArgumentException( "No CRC polynomial could be associated with type: " + type); } } @@ -155,10 +153,10 @@ public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) { * @param bytes bytes. * @param offset offset. * @return DataChecksum of the type in the array or null in case of an error. - * @throws IOException raised on errors performing I/O. + * @throws InvalidChecksumSizeException when the stored checksum is invalid. */ public static DataChecksum newDataChecksum(byte[] bytes, int offset) - throws IOException { + throws InvalidChecksumSizeException { if (offset < 0 || bytes.length < offset + getChecksumHeaderSize()) { throw new InvalidChecksumSizeException("Could not create DataChecksum " + " from the byte array of length " + bytes.length diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestCrcComposer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestCrcComposer.java index 5d8dcfb1be..cf437f3854 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestCrcComposer.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestCrcComposer.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -20,6 +20,7 @@ import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.IOException; +import java.util.Objects; import java.util.Random; import java.util.concurrent.TimeUnit; @@ -38,19 +39,18 @@ public class TestCrcComposer { @Rule public Timeout globalTimeout = new Timeout(10000, TimeUnit.MILLISECONDS); - private Random rand = new Random(1234); + private final Random rand = new Random(1234); - private DataChecksum.Type type = DataChecksum.Type.CRC32C; - private DataChecksum checksum = DataChecksum.newDataChecksum( - type, Integer.MAX_VALUE); - private int dataSize = 75; - private byte[] data = new byte[dataSize]; - private int chunkSize = 10; - private int cellSize = 20; + private final DataChecksum.Type type = DataChecksum.Type.CRC32C; + private final DataChecksum checksum = Objects.requireNonNull( + DataChecksum.newDataChecksum(type, Integer.MAX_VALUE)); + private final int dataSize = 75; + private final byte[] data = new byte[dataSize]; + private final int chunkSize = 10; + private final int cellSize = 20; private int fullCrc; private int[] crcsByChunk; - private int[] crcsByCell; private byte[] crcBytesByChunk; private byte[] crcBytesByCell; @@ -69,7 +69,7 @@ public void setup() throws IOException { data, (crcsByChunk.length - 1) * chunkSize, dataSize % chunkSize); // 3 cells of size cellSize, 1 cell of size (dataSize % cellSize). - crcsByCell = new int[4]; + int[] crcsByCell = new int[4]; for (int i = 0; i < 3; ++i) { crcsByCell[i] = getRangeChecksum(data, i * cellSize, cellSize); } @@ -86,7 +86,7 @@ private int getRangeChecksum(byte[] buf, int offset, int length) { return (int) checksum.getValue(); } - private byte[] intArrayToByteArray(int[] values) throws IOException { + private byte[] intArrayToByteArray(int[] values) { byte[] bytes = new byte[values.length * 4]; for (int i = 0; i < values.length; ++i) { CrcUtil.writeInt(bytes, i * 4, values[i]); @@ -95,7 +95,7 @@ private byte[] intArrayToByteArray(int[] values) throws IOException { } @Test - public void testUnstripedIncorrectChunkSize() throws IOException { + public void testUnstripedIncorrectChunkSize() { CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize); // If we incorrectly specify that all CRCs ingested correspond to chunkSize @@ -110,7 +110,7 @@ public void testUnstripedIncorrectChunkSize() throws IOException { } @Test - public void testUnstripedByteArray() throws IOException { + public void testUnstripedByteArray() { CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize); digester.update(crcBytesByChunk, 0, crcBytesByChunk.length - 4, chunkSize); digester.update( @@ -137,7 +137,7 @@ public void testUnstripedDataInputStream() throws IOException { } @Test - public void testUnstripedSingleCrcs() throws IOException { + public void testUnstripedSingleCrcs() { CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize); for (int i = 0; i < crcsByChunk.length - 1; ++i) { digester.update(crcsByChunk[i], chunkSize); @@ -151,7 +151,7 @@ public void testUnstripedSingleCrcs() throws IOException { } @Test - public void testStripedByteArray() throws IOException { + public void testStripedByteArray() { CrcComposer digester = CrcComposer.newStripedCrcComposer(type, chunkSize, cellSize); digester.update(crcBytesByChunk, 0, crcBytesByChunk.length - 4, chunkSize); @@ -176,7 +176,7 @@ public void testStripedDataInputStream() throws IOException { } @Test - public void testStripedSingleCrcs() throws IOException { + public void testStripedSingleCrcs() { CrcComposer digester = CrcComposer.newStripedCrcComposer(type, chunkSize, cellSize); for (int i = 0; i < crcsByChunk.length - 1; ++i) { @@ -225,7 +225,7 @@ public void testUpdateMismatchesStripe() throws Exception { // boundary in a single CRC, which is not allowed, since we'd lack a // CRC corresponding to the actual cellSize boundary. LambdaTestUtils.intercept( - IOException.class, + IllegalStateException.class, "stripe", () -> digester.update(crcsByChunk[1], cellSize)); } @@ -236,7 +236,7 @@ public void testUpdateByteArrayLengthUnalignedWithCrcSize() CrcComposer digester = CrcComposer.newCrcComposer(type, chunkSize); LambdaTestUtils.intercept( - IOException.class, + IllegalArgumentException.class, "length", () -> digester.update(crcBytesByChunk, 0, 6, chunkSize)); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestCrcUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestCrcUtil.java index b4355b1513..af96fdf541 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestCrcUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestCrcUtil.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,7 +17,7 @@ */ package org.apache.hadoop.util; -import java.io.IOException; +import java.util.Objects; import java.util.Random; import java.util.concurrent.TimeUnit; @@ -35,12 +35,12 @@ public class TestCrcUtil { @Rule public Timeout globalTimeout = new Timeout(10000, TimeUnit.MILLISECONDS); - private Random rand = new Random(1234); + private static final Random RANDOM = new Random(1234); @Test - public void testComposeCrc32() throws IOException { + public void testComposeCrc32() { byte[] data = new byte[64 * 1024]; - rand.nextBytes(data); + RANDOM.nextBytes(data); doTestComposeCrc(data, DataChecksum.Type.CRC32, 512, false); doTestComposeCrc(data, DataChecksum.Type.CRC32, 511, false); doTestComposeCrc(data, DataChecksum.Type.CRC32, 32 * 1024, false); @@ -48,9 +48,9 @@ public void testComposeCrc32() throws IOException { } @Test - public void testComposeCrc32c() throws IOException { + public void testComposeCrc32c() { byte[] data = new byte[64 * 1024]; - rand.nextBytes(data); + RANDOM.nextBytes(data); doTestComposeCrc(data, DataChecksum.Type.CRC32C, 512, false); doTestComposeCrc(data, DataChecksum.Type.CRC32C, 511, false); doTestComposeCrc(data, DataChecksum.Type.CRC32C, 32 * 1024, false); @@ -58,9 +58,9 @@ public void testComposeCrc32c() throws IOException { } @Test - public void testComposeCrc32WithMonomial() throws IOException { + public void testComposeCrc32WithMonomial() { byte[] data = new byte[64 * 1024]; - rand.nextBytes(data); + RANDOM.nextBytes(data); doTestComposeCrc(data, DataChecksum.Type.CRC32, 512, true); doTestComposeCrc(data, DataChecksum.Type.CRC32, 511, true); doTestComposeCrc(data, DataChecksum.Type.CRC32, 32 * 1024, true); @@ -68,9 +68,9 @@ public void testComposeCrc32WithMonomial() throws IOException { } @Test - public void testComposeCrc32cWithMonomial() throws IOException { + public void testComposeCrc32cWithMonomial() { byte[] data = new byte[64 * 1024]; - rand.nextBytes(data); + RANDOM.nextBytes(data); doTestComposeCrc(data, DataChecksum.Type.CRC32C, 512, true); doTestComposeCrc(data, DataChecksum.Type.CRC32C, 511, true); doTestComposeCrc(data, DataChecksum.Type.CRC32C, 32 * 1024, true); @@ -78,12 +78,12 @@ public void testComposeCrc32cWithMonomial() throws IOException { } @Test - public void testComposeCrc32ZeroLength() throws IOException { + public void testComposeCrc32ZeroLength() { doTestComposeCrcZerolength(DataChecksum.Type.CRC32); } @Test - public void testComposeCrc32CZeroLength() throws IOException { + public void testComposeCrc32CZeroLength() { doTestComposeCrcZerolength(DataChecksum.Type.CRC32C); } @@ -93,13 +93,13 @@ public void testComposeCrc32CZeroLength() throws IOException { * corresponding to ever {@code chunkSize} bytes. */ private static void doTestComposeCrc( - byte[] data, DataChecksum.Type type, int chunkSize, boolean useMonomial) - throws IOException { + byte[] data, DataChecksum.Type type, int chunkSize, boolean useMonomial) { int crcPolynomial = DataChecksum.getCrcPolynomialForType(type); // Get full end-to-end CRC in a single shot first. DataChecksum checksum = DataChecksum.newDataChecksum( type, Integer.MAX_VALUE); + Objects.requireNonNull(checksum, "checksum"); checksum.update(data, 0, data.length); int fullCrc = (int) checksum.getValue(); @@ -145,14 +145,14 @@ private static void doTestComposeCrc( * Helper method for testing the behavior of composing a CRC with a * zero-length second CRC. */ - private static void doTestComposeCrcZerolength(DataChecksum.Type type) - throws IOException { + private static void doTestComposeCrcZerolength(DataChecksum.Type type) { // Without loss of generality, we can pick any integer as our fake crcA // even if we don't happen to know the preimage. int crcA = 0xCAFEBEEF; int crcPolynomial = DataChecksum.getCrcPolynomialForType(type); DataChecksum checksum = DataChecksum.newDataChecksum( type, Integer.MAX_VALUE); + Objects.requireNonNull(checksum, "checksum"); int crcB = (int) checksum.getValue(); assertEquals(crcA, CrcUtil.compose(crcA, crcB, 0, crcPolynomial)); @@ -162,7 +162,7 @@ private static void doTestComposeCrcZerolength(DataChecksum.Type type) } @Test - public void testIntSerialization() throws IOException { + public void testIntSerialization() { byte[] bytes = CrcUtil.intToBytes(0xCAFEBEEF); assertEquals(0xCAFEBEEF, CrcUtil.readInt(bytes, 0)); @@ -180,13 +180,13 @@ public void testIntSerialization() throws IOException { public void testToSingleCrcStringBadLength() throws Exception { LambdaTestUtils.intercept( - IOException.class, + IllegalArgumentException.class, "length", () -> CrcUtil.toSingleCrcString(new byte[8])); } @Test - public void testToSingleCrcString() throws IOException { + public void testToSingleCrcString() { byte[] buf = CrcUtil.intToBytes(0xcafebeef); assertEquals( "0xcafebeef", CrcUtil.toSingleCrcString(buf)); @@ -196,14 +196,13 @@ public void testToSingleCrcString() throws IOException { public void testToMultiCrcStringBadLength() throws Exception { LambdaTestUtils.intercept( - IOException.class, + IllegalArgumentException.class, "length", () -> CrcUtil.toMultiCrcString(new byte[6])); } @Test - public void testToMultiCrcStringMultipleElements() - throws IOException { + public void testToMultiCrcStringMultipleElements() { byte[] buf = new byte[12]; CrcUtil.writeInt(buf, 0, 0xcafebeef); CrcUtil.writeInt(buf, 4, 0xababcccc); @@ -214,8 +213,7 @@ public void testToMultiCrcStringMultipleElements() } @Test - public void testToMultiCrcStringSingleElement() - throws IOException { + public void testToMultiCrcStringSingleElement() { byte[] buf = new byte[4]; CrcUtil.writeInt(buf, 0, 0xcafebeef); assertEquals( @@ -224,8 +222,7 @@ public void testToMultiCrcStringSingleElement() } @Test - public void testToMultiCrcStringNoElements() - throws IOException { + public void testToMultiCrcStringNoElements() { assertEquals( "[]", CrcUtil.toMultiCrcString(new byte[0]));