diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 21d8202680..ff36e1da89 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -647,6 +647,9 @@ Trunk (Unreleased) HADOOP-12757. Findbug compilation fails for 'Kafka Library support'. (aajisaka) + HADOOP-12041. Implement another Reed-Solomon coder in pure Java. + (Kai Zheng via zhz) + Release 2.9.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/AbstractRawErasureDecoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/AbstractRawErasureDecoder.java index 37a9bcd99c..cf2b7389b5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/AbstractRawErasureDecoder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/AbstractRawErasureDecoder.java @@ -20,9 +20,9 @@ import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.io.erasurecode.ECChunk; +import org.apache.hadoop.io.erasurecode.rawcoder.util.CoderUtil; import java.nio.ByteBuffer; -import java.util.Arrays; /** * An abstract raw erasure decoder that's to be inherited by new decoders. @@ -42,7 +42,7 @@ public void decode(ByteBuffer[] inputs, int[] erasedIndexes, ByteBuffer[] outputs) { checkParameters(inputs, erasedIndexes, outputs); - ByteBuffer validInput = findFirstValidInput(inputs); + ByteBuffer validInput = CoderUtil.findFirstValidInput(inputs); boolean usingDirectBuffer = validInput.isDirect(); int dataLen = validInput.remaining(); if (dataLen == 0) { @@ -106,7 +106,7 @@ protected abstract void doDecode(ByteBuffer[] inputs, int[] erasedIndexes, public void decode(byte[][] inputs, int[] erasedIndexes, byte[][] outputs) { checkParameters(inputs, erasedIndexes, outputs); - byte[] validInput = findFirstValidInput(inputs); + byte[] validInput = CoderUtil.findFirstValidInput(inputs); int dataLen = validInput.length; if (dataLen == 0) { return; @@ -178,37 +178,4 @@ protected void checkParameters(T[] inputs, int[] erasedIndexes, "No enough valid inputs are provided, not recoverable"); } } - - /** - * Get indexes into inputs array for items marked as null, either erased or - * not to read. - * @return indexes into inputs array - */ - protected int[] getErasedOrNotToReadIndexes(T[] inputs) { - int[] invalidIndexes = new int[inputs.length]; - int idx = 0; - for (int i = 0; i < inputs.length; i++) { - if (inputs[i] == null) { - invalidIndexes[idx++] = i; - } - } - - return Arrays.copyOf(invalidIndexes, idx); - } - - /** - * Find the valid input from all the inputs. - * @param inputs input buffers to look for valid input - * @return the first valid input - */ - protected static T findFirstValidInput(T[] inputs) { - for (T input : inputs) { - if (input != null) { - return input; - } - } - - throw new HadoopIllegalArgumentException( - "Invalid inputs are found, all being null"); - } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawDecoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawDecoder.java index 87347c0af1..b7ee49de78 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawDecoder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawDecoder.java @@ -19,6 +19,7 @@ import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.io.erasurecode.rawcoder.util.CoderUtil; import org.apache.hadoop.io.erasurecode.rawcoder.util.RSUtil; import java.nio.ByteBuffer; @@ -103,7 +104,7 @@ public void decode(byte[][] inputs, int[] erasedIndexes, byte[][] outputs) { private void doDecodeImpl(ByteBuffer[] inputs, int[] erasedIndexes, ByteBuffer[] outputs) { - ByteBuffer valid = findFirstValidInput(inputs); + ByteBuffer valid = CoderUtil.findFirstValidInput(inputs); int dataLen = valid.remaining(); for (int i = 0; i < erasedIndexes.length; i++) { errSignature[i] = primitivePower[erasedIndexes[i]]; @@ -136,7 +137,8 @@ protected void doDecode(byte[][] inputs, int[] inputOffsets, * implementations, so we have to adjust them before calling doDecodeImpl. */ - int[] erasedOrNotToReadIndexes = getErasedOrNotToReadIndexes(inputs); + int[] erasedOrNotToReadIndexes = + CoderUtil.getErasedOrNotToReadIndexes(inputs); // Prepare for adjustedOutputsParameter @@ -181,7 +183,7 @@ protected void doDecode(byte[][] inputs, int[] inputOffsets, @Override protected void doDecode(ByteBuffer[] inputs, int[] erasedIndexes, ByteBuffer[] outputs) { - ByteBuffer validInput = findFirstValidInput(inputs); + ByteBuffer validInput = CoderUtil.findFirstValidInput(inputs); int dataLen = validInput.remaining(); /** @@ -189,7 +191,8 @@ protected void doDecode(ByteBuffer[] inputs, int[] erasedIndexes, * implementations, so we have to adjust them before calling doDecodeImpl. */ - int[] erasedOrNotToReadIndexes = getErasedOrNotToReadIndexes(inputs); + int[] erasedOrNotToReadIndexes = + CoderUtil.getErasedOrNotToReadIndexes(inputs); // Prepare for adjustedDirectBufferOutputsParameter diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawDecoder2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawDecoder2.java new file mode 100644 index 0000000000..48a3579d13 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawDecoder2.java @@ -0,0 +1,176 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder; + +import org.apache.hadoop.HadoopIllegalArgumentException; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.io.erasurecode.rawcoder.util.CoderUtil; +import org.apache.hadoop.io.erasurecode.rawcoder.util.DumpUtil; +import org.apache.hadoop.io.erasurecode.rawcoder.util.GF256; +import org.apache.hadoop.io.erasurecode.rawcoder.util.RSUtil; +import org.apache.hadoop.io.erasurecode.rawcoder.util.RSUtil2; + +import java.nio.ByteBuffer; +import java.util.Arrays; + +/** + * A raw erasure decoder in RS code scheme in pure Java in case native one + * isn't available in some environment. Please always use native implementations + * when possible. This new Java coder is about 5X faster than the one originated + * from HDFS-RAID, and also compatible with the native/ISA-L coder. + */ +@InterfaceAudience.Private +public class RSRawDecoder2 extends AbstractRawErasureDecoder { + //relevant to schema and won't change during decode calls + private byte[] encodeMatrix; + + /** + * Below are relevant to schema and erased indexes, thus may change during + * decode calls. + */ + private byte[] decodeMatrix; + private byte[] invertMatrix; + /** + * Array of input tables generated from coding coefficients previously. + * Must be of size 32*k*rows + */ + private byte[] gfTables; + private int[] cachedErasedIndexes; + private int[] validIndexes; + private int numErasedDataUnits; + private boolean[] erasureFlags; + + public RSRawDecoder2(int numDataUnits, int numParityUnits) { + super(numDataUnits, numParityUnits); + if (numDataUnits + numParityUnits >= RSUtil.GF.getFieldSize()) { + throw new HadoopIllegalArgumentException( + "Invalid getNumDataUnits() and numParityUnits"); + } + + int numAllUnits = getNumDataUnits() + numParityUnits; + encodeMatrix = new byte[numAllUnits * getNumDataUnits()]; + RSUtil2.genCauchyMatrix(encodeMatrix, numAllUnits, getNumDataUnits()); + if (isAllowingVerboseDump()) { + DumpUtil.dumpMatrix(encodeMatrix, numDataUnits, numAllUnits); + } + } + + @Override + protected void doDecode(ByteBuffer[] inputs, int[] erasedIndexes, + ByteBuffer[] outputs) { + prepareDecoding(inputs, erasedIndexes); + + ByteBuffer[] realInputs = new ByteBuffer[getNumDataUnits()]; + for (int i = 0; i < getNumDataUnits(); i++) { + realInputs[i] = inputs[validIndexes[i]]; + } + RSUtil2.encodeData(gfTables, realInputs, outputs); + } + + @Override + protected void doDecode(byte[][] inputs, int[] inputOffsets, + int dataLen, int[] erasedIndexes, + byte[][] outputs, int[] outputOffsets) { + prepareDecoding(inputs, erasedIndexes); + + byte[][] realInputs = new byte[getNumDataUnits()][]; + int[] realInputOffsets = new int[getNumDataUnits()]; + for (int i = 0; i < getNumDataUnits(); i++) { + realInputs[i] = inputs[validIndexes[i]]; + realInputOffsets[i] = inputOffsets[validIndexes[i]]; + } + RSUtil2.encodeData(gfTables, dataLen, realInputs, realInputOffsets, + outputs, outputOffsets); + } + + private void prepareDecoding(T[] inputs, int[] erasedIndexes) { + int[] tmpValidIndexes = new int[getNumDataUnits()]; + CoderUtil.makeValidIndexes(inputs, tmpValidIndexes); + if (Arrays.equals(this.cachedErasedIndexes, erasedIndexes) && + Arrays.equals(this.validIndexes, tmpValidIndexes)) { + return; // Optimization. Nothing to do + } + this.cachedErasedIndexes = + Arrays.copyOf(erasedIndexes, erasedIndexes.length); + this.validIndexes = + Arrays.copyOf(tmpValidIndexes, tmpValidIndexes.length); + + processErasures(erasedIndexes); + } + + private void processErasures(int[] erasedIndexes) { + this.decodeMatrix = new byte[getNumAllUnits() * getNumDataUnits()]; + this.invertMatrix = new byte[getNumAllUnits() * getNumDataUnits()]; + this.gfTables = new byte[getNumAllUnits() * getNumDataUnits() * 32]; + + this.erasureFlags = new boolean[getNumAllUnits()]; + this.numErasedDataUnits = 0; + + for (int i = 0; i < erasedIndexes.length; i++) { + int index = erasedIndexes[i]; + erasureFlags[index] = true; + if (index < getNumDataUnits()) { + numErasedDataUnits++; + } + } + + generateDecodeMatrix(erasedIndexes); + + RSUtil2.initTables(getNumDataUnits(), erasedIndexes.length, + decodeMatrix, 0, gfTables); + if (isAllowingVerboseDump()) { + System.out.println(DumpUtil.bytesToHex(gfTables, -1)); + } + } + + // Generate decode matrix from encode matrix + private void generateDecodeMatrix(int[] erasedIndexes) { + int i, j, r, p; + byte s; + byte[] tmpMatrix = new byte[getNumAllUnits() * getNumDataUnits()]; + + // Construct matrix tmpMatrix by removing error rows + for (i = 0; i < getNumDataUnits(); i++) { + r = validIndexes[i]; + for (j = 0; j < getNumDataUnits(); j++) { + tmpMatrix[getNumDataUnits() * i + j] = + encodeMatrix[getNumDataUnits() * r + j]; + } + } + + GF256.gfInvertMatrix(tmpMatrix, invertMatrix, getNumDataUnits()); + + for (i = 0; i < numErasedDataUnits; i++) { + for (j = 0; j < getNumDataUnits(); j++) { + decodeMatrix[getNumDataUnits() * i + j] = + invertMatrix[getNumDataUnits() * erasedIndexes[i] + j]; + } + } + + for (p = numErasedDataUnits; p < erasedIndexes.length; p++) { + for (i = 0; i < getNumDataUnits(); i++) { + s = 0; + for (j = 0; j < getNumDataUnits(); j++) { + s ^= GF256.gfMul(invertMatrix[j * getNumDataUnits() + i], + encodeMatrix[getNumDataUnits() * erasedIndexes[p] + j]); + } + decodeMatrix[getNumDataUnits() * p + i] = s; + } + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawEncoder2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawEncoder2.java new file mode 100644 index 0000000000..72d77f7bae --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawEncoder2.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder; + +import org.apache.hadoop.HadoopIllegalArgumentException; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.io.erasurecode.rawcoder.util.DumpUtil; +import org.apache.hadoop.io.erasurecode.rawcoder.util.RSUtil; +import org.apache.hadoop.io.erasurecode.rawcoder.util.RSUtil2; + +import java.nio.ByteBuffer; + +/** + * A raw erasure encoder in RS code scheme in pure Java in case native one + * isn't available in some environment. Please always use native implementations + * when possible. This new Java coder is about 5X faster than the one originated + * from HDFS-RAID, and also compatible with the native/ISA-L coder. + */ +@InterfaceAudience.Private +public class RSRawEncoder2 extends AbstractRawErasureEncoder { + // relevant to schema and won't change during encode calls. + private byte[] encodeMatrix; + /** + * Array of input tables generated from coding coefficients previously. + * Must be of size 32*k*rows + */ + private byte[] gfTables; + + public RSRawEncoder2(int numDataUnits, int numParityUnits) { + super(numDataUnits, numParityUnits); + + if (numDataUnits + numParityUnits >= RSUtil.GF.getFieldSize()) { + throw new HadoopIllegalArgumentException( + "Invalid numDataUnits and numParityUnits"); + } + + encodeMatrix = new byte[getNumAllUnits() * numDataUnits]; + RSUtil2.genCauchyMatrix(encodeMatrix, getNumAllUnits(), numDataUnits); + if (isAllowingVerboseDump()) { + DumpUtil.dumpMatrix(encodeMatrix, numDataUnits, getNumAllUnits()); + } + gfTables = new byte[getNumAllUnits() * numDataUnits * 32]; + RSUtil2.initTables(numDataUnits, numParityUnits, encodeMatrix, + numDataUnits * numDataUnits, gfTables); + if (isAllowingVerboseDump()) { + System.out.println(DumpUtil.bytesToHex(gfTables, -1)); + } + } + + @Override + protected void doEncode(ByteBuffer[] inputs, ByteBuffer[] outputs) { + RSUtil2.encodeData(gfTables, inputs, outputs); + } + + @Override + protected void doEncode(byte[][] inputs, int[] inputOffsets, + int dataLen, byte[][] outputs, int[] outputOffsets) { + RSUtil2.encodeData(gfTables, dataLen, inputs, inputOffsets, outputs, + outputOffsets); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawErasureCoderFactory2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawErasureCoderFactory2.java new file mode 100644 index 0000000000..40a0f1c0bf --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawErasureCoderFactory2.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * A raw coder factory for raw Reed-Solomon coder in Java. + */ +@InterfaceAudience.Private +public class RSRawErasureCoderFactory2 implements RawErasureCoderFactory { + + @Override + public RawErasureEncoder createEncoder(int numDataUnits, int numParityUnits) { + return new RSRawEncoder2(numDataUnits, numParityUnits); + } + + @Override + public RawErasureDecoder createDecoder(int numDataUnits, int numParityUnits) { + return new RSRawDecoder2(numDataUnits, numParityUnits); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureCoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureCoder.java index dbe2da9881..20a1a690e1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureCoder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureCoder.java @@ -40,7 +40,7 @@ public interface RawErasureCoder extends Configurable { /** * Get a coder option value. * @param option - * @return + * @return option value */ public Object getCoderOption(CoderOption option); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/CoderUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/CoderUtil.java new file mode 100644 index 0000000000..07d15bed9d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/CoderUtil.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder.util; + +import org.apache.hadoop.HadoopIllegalArgumentException; +import org.apache.hadoop.classification.InterfaceAudience; + +import java.util.Arrays; + +/** + * Helpful utilities for implementing some raw erasure coders. + */ +@InterfaceAudience.Private +public final class CoderUtil { + + private CoderUtil() { + // No called + } + + + /** + * Get indexes into inputs array for items marked as null, either erased or + * not to read. + * @return indexes into inputs array + */ + public static int[] getErasedOrNotToReadIndexes(T[] inputs) { + int[] invalidIndexes = new int[inputs.length]; + int idx = 0; + for (int i = 0; i < inputs.length; i++) { + if (inputs[i] == null) { + invalidIndexes[idx++] = i; + } + } + + return Arrays.copyOf(invalidIndexes, idx); + } + + /** + * Find the valid input from all the inputs. + * @param inputs input buffers to look for valid input + * @return the first valid input + */ + public static T findFirstValidInput(T[] inputs) { + for (T input : inputs) { + if (input != null) { + return input; + } + } + + throw new HadoopIllegalArgumentException( + "Invalid inputs are found, all being null"); + } + + /** + * Picking up indexes of valid inputs. + * @param inputs actually decoding input buffers + * @param validIndexes an array to be filled and returned + * @param + */ + public static void makeValidIndexes(T[] inputs, int[] validIndexes) { + int idx = 0; + for (int i = 0; i < inputs.length && idx < validIndexes.length; i++) { + if (inputs[i] != null) { + validIndexes[idx++] = i; + } + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/DumpUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/DumpUtil.java index 785539cbee..6de0716174 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/DumpUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/DumpUtil.java @@ -21,8 +21,8 @@ import org.apache.hadoop.io.erasurecode.ECChunk; /** - * A dump utility class for debugging data erasure coding/decoding issues. Don't - * suggest they are used in runtime production codes. + * A dump utility class for debugging data erasure coding/decoding issues. + * Don't suggest they are used in runtime production codes. */ @InterfaceAudience.Private public final class DumpUtil { @@ -35,9 +35,10 @@ private DumpUtil() { /** * Convert bytes into format like 0x02 02 00 80. + * If limit is negative or too large, then all bytes will be converted. */ public static String bytesToHex(byte[] bytes, int limit) { - if (limit > bytes.length) { + if (limit <= 0 || limit > bytes.length) { limit = bytes.length; } int len = limit * 2; @@ -56,6 +57,17 @@ public static String bytesToHex(byte[] bytes, int limit) { return new String(hexChars); } + public static void dumpMatrix(byte[] matrix, + int numDataUnits, int numAllUnits) { + for (int i = 0; i < numDataUnits; i++) { + for (int j = 0; j < numAllUnits; j++) { + System.out.print(" "); + System.out.print(0xff & matrix[i * numAllUnits + j]); + } + System.out.println(); + } + } + /** * Print data in hex format in an array of chunks. * @param header diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GF256.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GF256.java new file mode 100644 index 0000000000..03e21b6089 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GF256.java @@ -0,0 +1,339 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder.util; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * A GaloisField utility class only caring of 256 fields for efficiency. Some + * of the codes are borrowed from ISA-L implementation (C or ASM codes). + */ +@InterfaceAudience.Private +public final class GF256 { + + private GF256() { } + + public static byte[] gfBase() { + return GF_BASE; + } + + private static final byte[] GF_BASE = new byte[] { + (byte) 0x01, (byte) 0x02, (byte) 0x04, (byte) 0x08, (byte) 0x10, + (byte) 0x20, (byte) 0x40, (byte) 0x80, (byte) 0x1d, (byte) 0x3a, + (byte) 0x74, (byte) 0xe8, (byte) 0xcd, (byte) 0x87, (byte) 0x13, + (byte) 0x26, (byte) 0x4c, (byte) 0x98, (byte) 0x2d, (byte) 0x5a, + (byte) 0xb4, (byte) 0x75, (byte) 0xea, (byte) 0xc9, (byte) 0x8f, + (byte) 0x03, (byte) 0x06, (byte) 0x0c, (byte) 0x18, (byte) 0x30, + (byte) 0x60, (byte) 0xc0, (byte) 0x9d, (byte) 0x27, (byte) 0x4e, + (byte) 0x9c, (byte) 0x25, (byte) 0x4a, (byte) 0x94, (byte) 0x35, + (byte) 0x6a, (byte) 0xd4, (byte) 0xb5, (byte) 0x77, (byte) 0xee, + (byte) 0xc1, (byte) 0x9f, (byte) 0x23, (byte) 0x46, (byte) 0x8c, + (byte) 0x05, (byte) 0x0a, (byte) 0x14, (byte) 0x28, (byte) 0x50, + (byte) 0xa0, (byte) 0x5d, (byte) 0xba, (byte) 0x69, (byte) 0xd2, + (byte) 0xb9, (byte) 0x6f, (byte) 0xde, (byte) 0xa1, (byte) 0x5f, + (byte) 0xbe, (byte) 0x61, (byte) 0xc2, (byte) 0x99, (byte) 0x2f, + (byte) 0x5e, (byte) 0xbc, (byte) 0x65, (byte) 0xca, (byte) 0x89, + (byte) 0x0f, (byte) 0x1e, (byte) 0x3c, (byte) 0x78, (byte) 0xf0, + (byte) 0xfd, (byte) 0xe7, (byte) 0xd3, (byte) 0xbb, (byte) 0x6b, + (byte) 0xd6, (byte) 0xb1, (byte) 0x7f, (byte) 0xfe, (byte) 0xe1, + (byte) 0xdf, (byte) 0xa3, (byte) 0x5b, (byte) 0xb6, (byte) 0x71, + (byte) 0xe2, (byte) 0xd9, (byte) 0xaf, (byte) 0x43, (byte) 0x86, + (byte) 0x11, (byte) 0x22, (byte) 0x44, (byte) 0x88, (byte) 0x0d, + (byte) 0x1a, (byte) 0x34, (byte) 0x68, (byte) 0xd0, (byte) 0xbd, + (byte) 0x67, (byte) 0xce, (byte) 0x81, (byte) 0x1f, (byte) 0x3e, + (byte) 0x7c, (byte) 0xf8, (byte) 0xed, (byte) 0xc7, (byte) 0x93, + (byte) 0x3b, (byte) 0x76, (byte) 0xec, (byte) 0xc5, (byte) 0x97, + (byte) 0x33, (byte) 0x66, (byte) 0xcc, (byte) 0x85, (byte) 0x17, + (byte) 0x2e, (byte) 0x5c, (byte) 0xb8, (byte) 0x6d, (byte) 0xda, + (byte) 0xa9, (byte) 0x4f, (byte) 0x9e, (byte) 0x21, (byte) 0x42, + (byte) 0x84, (byte) 0x15, (byte) 0x2a, (byte) 0x54, (byte) 0xa8, + (byte) 0x4d, (byte) 0x9a, (byte) 0x29, (byte) 0x52, (byte) 0xa4, + (byte) 0x55, (byte) 0xaa, (byte) 0x49, (byte) 0x92, (byte) 0x39, + (byte) 0x72, (byte) 0xe4, (byte) 0xd5, (byte) 0xb7, (byte) 0x73, + (byte) 0xe6, (byte) 0xd1, (byte) 0xbf, (byte) 0x63, (byte) 0xc6, + (byte) 0x91, (byte) 0x3f, (byte) 0x7e, (byte) 0xfc, (byte) 0xe5, + (byte) 0xd7, (byte) 0xb3, (byte) 0x7b, (byte) 0xf6, (byte) 0xf1, + (byte) 0xff, (byte) 0xe3, (byte) 0xdb, (byte) 0xab, (byte) 0x4b, + (byte) 0x96, (byte) 0x31, (byte) 0x62, (byte) 0xc4, (byte) 0x95, + (byte) 0x37, (byte) 0x6e, (byte) 0xdc, (byte) 0xa5, (byte) 0x57, + (byte) 0xae, (byte) 0x41, (byte) 0x82, (byte) 0x19, (byte) 0x32, + (byte) 0x64, (byte) 0xc8, (byte) 0x8d, (byte) 0x07, (byte) 0x0e, + (byte) 0x1c, (byte) 0x38, (byte) 0x70, (byte) 0xe0, (byte) 0xdd, + (byte) 0xa7, (byte) 0x53, (byte) 0xa6, (byte) 0x51, (byte) 0xa2, + (byte) 0x59, (byte) 0xb2, (byte) 0x79, (byte) 0xf2, (byte) 0xf9, + (byte) 0xef, (byte) 0xc3, (byte) 0x9b, (byte) 0x2b, (byte) 0x56, + (byte) 0xac, (byte) 0x45, (byte) 0x8a, (byte) 0x09, (byte) 0x12, + (byte) 0x24, (byte) 0x48, (byte) 0x90, (byte) 0x3d, (byte) 0x7a, + (byte) 0xf4, (byte) 0xf5, (byte) 0xf7, (byte) 0xf3, (byte) 0xfb, + (byte) 0xeb, (byte) 0xcb, (byte) 0x8b, (byte) 0x0b, (byte) 0x16, + (byte) 0x2c, (byte) 0x58, (byte) 0xb0, (byte) 0x7d, (byte) 0xfa, + (byte) 0xe9, (byte) 0xcf, (byte) 0x83, (byte) 0x1b, (byte) 0x36, + (byte) 0x6c, (byte) 0xd8, (byte) 0xad, (byte) 0x47, (byte) 0x8e, + (byte) 0x01 + }; + + public static byte[] gfLogBase() { + return GF_LOG_BASE; + } + + private static final byte[] GF_LOG_BASE = new byte[] { + (byte) 0x00, (byte) 0xff, (byte) 0x01, (byte) 0x19, (byte) 0x02, + (byte) 0x32, (byte) 0x1a, (byte) 0xc6, (byte) 0x03, (byte) 0xdf, + (byte) 0x33, (byte) 0xee, (byte) 0x1b, (byte) 0x68, (byte) 0xc7, + (byte) 0x4b, (byte) 0x04, (byte) 0x64, (byte) 0xe0, (byte) 0x0e, + (byte) 0x34, (byte) 0x8d, (byte) 0xef, (byte) 0x81, (byte) 0x1c, + (byte) 0xc1, (byte) 0x69, (byte) 0xf8, (byte) 0xc8, (byte) 0x08, + (byte) 0x4c, (byte) 0x71, (byte) 0x05, (byte) 0x8a, (byte) 0x65, + (byte) 0x2f, (byte) 0xe1, (byte) 0x24, (byte) 0x0f, (byte) 0x21, + (byte) 0x35, (byte) 0x93, (byte) 0x8e, (byte) 0xda, (byte) 0xf0, + (byte) 0x12, (byte) 0x82, (byte) 0x45, (byte) 0x1d, (byte) 0xb5, + (byte) 0xc2, (byte) 0x7d, (byte) 0x6a, (byte) 0x27, (byte) 0xf9, + (byte) 0xb9, (byte) 0xc9, (byte) 0x9a, (byte) 0x09, (byte) 0x78, + (byte) 0x4d, (byte) 0xe4, (byte) 0x72, (byte) 0xa6, (byte) 0x06, + (byte) 0xbf, (byte) 0x8b, (byte) 0x62, (byte) 0x66, (byte) 0xdd, + (byte) 0x30, (byte) 0xfd, (byte) 0xe2, (byte) 0x98, (byte) 0x25, + (byte) 0xb3, (byte) 0x10, (byte) 0x91, (byte) 0x22, (byte) 0x88, + (byte) 0x36, (byte) 0xd0, (byte) 0x94, (byte) 0xce, (byte) 0x8f, + (byte) 0x96, (byte) 0xdb, (byte) 0xbd, (byte) 0xf1, (byte) 0xd2, + (byte) 0x13, (byte) 0x5c, (byte) 0x83, (byte) 0x38, (byte) 0x46, + (byte) 0x40, (byte) 0x1e, (byte) 0x42, (byte) 0xb6, (byte) 0xa3, + (byte) 0xc3, (byte) 0x48, (byte) 0x7e, (byte) 0x6e, (byte) 0x6b, + (byte) 0x3a, (byte) 0x28, (byte) 0x54, (byte) 0xfa, (byte) 0x85, + (byte) 0xba, (byte) 0x3d, (byte) 0xca, (byte) 0x5e, (byte) 0x9b, + (byte) 0x9f, (byte) 0x0a, (byte) 0x15, (byte) 0x79, (byte) 0x2b, + (byte) 0x4e, (byte) 0xd4, (byte) 0xe5, (byte) 0xac, (byte) 0x73, + (byte) 0xf3, (byte) 0xa7, (byte) 0x57, (byte) 0x07, (byte) 0x70, + (byte) 0xc0, (byte) 0xf7, (byte) 0x8c, (byte) 0x80, (byte) 0x63, + (byte) 0x0d, (byte) 0x67, (byte) 0x4a, (byte) 0xde, (byte) 0xed, + (byte) 0x31, (byte) 0xc5, (byte) 0xfe, (byte) 0x18, (byte) 0xe3, + (byte) 0xa5, (byte) 0x99, (byte) 0x77, (byte) 0x26, (byte) 0xb8, + (byte) 0xb4, (byte) 0x7c, (byte) 0x11, (byte) 0x44, (byte) 0x92, + (byte) 0xd9, (byte) 0x23, (byte) 0x20, (byte) 0x89, (byte) 0x2e, + (byte) 0x37, (byte) 0x3f, (byte) 0xd1, (byte) 0x5b, (byte) 0x95, + (byte) 0xbc, (byte) 0xcf, (byte) 0xcd, (byte) 0x90, (byte) 0x87, + (byte) 0x97, (byte) 0xb2, (byte) 0xdc, (byte) 0xfc, (byte) 0xbe, + (byte) 0x61, (byte) 0xf2, (byte) 0x56, (byte) 0xd3, (byte) 0xab, + (byte) 0x14, (byte) 0x2a, (byte) 0x5d, (byte) 0x9e, (byte) 0x84, + (byte) 0x3c, (byte) 0x39, (byte) 0x53, (byte) 0x47, (byte) 0x6d, + (byte) 0x41, (byte) 0xa2, (byte) 0x1f, (byte) 0x2d, (byte) 0x43, + (byte) 0xd8, (byte) 0xb7, (byte) 0x7b, (byte) 0xa4, (byte) 0x76, + (byte) 0xc4, (byte) 0x17, (byte) 0x49, (byte) 0xec, (byte) 0x7f, + (byte) 0x0c, (byte) 0x6f, (byte) 0xf6, (byte) 0x6c, (byte) 0xa1, + (byte) 0x3b, (byte) 0x52, (byte) 0x29, (byte) 0x9d, (byte) 0x55, + (byte) 0xaa, (byte) 0xfb, (byte) 0x60, (byte) 0x86, (byte) 0xb1, + (byte) 0xbb, (byte) 0xcc, (byte) 0x3e, (byte) 0x5a, (byte) 0xcb, + (byte) 0x59, (byte) 0x5f, (byte) 0xb0, (byte) 0x9c, (byte) 0xa9, + (byte) 0xa0, (byte) 0x51, (byte) 0x0b, (byte) 0xf5, (byte) 0x16, + (byte) 0xeb, (byte) 0x7a, (byte) 0x75, (byte) 0x2c, (byte) 0xd7, + (byte) 0x4f, (byte) 0xae, (byte) 0xd5, (byte) 0xe9, (byte) 0xe6, + (byte) 0xe7, (byte) 0xad, (byte) 0xe8, (byte) 0x74, (byte) 0xd6, + (byte) 0xf4, (byte) 0xea, (byte) 0xa8, (byte) 0x50, (byte) 0x58, + (byte) 0xaf + }; + + private static byte[][] theGfMulTab; // multiply result table in GF 256 space + + /** + * Initialize the GF multiply table for performance. Just compute once, and + * avoid repeatedly doing the multiply during encoding/decoding. + */ + static { + theGfMulTab = new byte[256][256]; + for (int i = 0; i < 256; i++) { + for (int j = 0; j < 256; j++) { + theGfMulTab[i][j] = gfMul((byte) i, (byte) j); + } + } + } + + /** + * Get the big GF multiply table so utilize it efficiently. + * @return the big GF multiply table + */ + public static byte[][] gfMulTab() { + return theGfMulTab; + } + + public static byte gfMul(byte a, byte b) { + if ((a == 0) || (b == 0)) { + return 0; + } + + int tmp = (GF_LOG_BASE[a & 0xff] & 0xff) + + (GF_LOG_BASE[b & 0xff] & 0xff); + if (tmp > 254) { + tmp -= 255; + } + + return GF_BASE[tmp]; + } + + public static byte gfInv(byte a) { + if (a == 0) { + return 0; + } + + return GF_BASE[255 - GF_LOG_BASE[a & 0xff] & 0xff]; + } + + /** + * Invert a matrix assuming it's invertible. + * + * Ported from Intel ISA-L library. + */ + public static void gfInvertMatrix(byte[] inMatrix, byte[] outMatrix, int n) { + byte temp; + + // Set outMatrix[] to the identity matrix + for (int i = 0; i < n * n; i++) { + // memset(outMatrix, 0, n*n) + outMatrix[i] = 0; + } + + for (int i = 0; i < n; i++) { + outMatrix[i * n + i] = 1; + } + + // Inverse + for (int j, i = 0; i < n; i++) { + // Check for 0 in pivot element + if (inMatrix[i * n + i] == 0) { + // Find a row with non-zero in current column and swap + for (j = i + 1; j < n; j++) { + if (inMatrix[j * n + i] != 0) { + break; + } + } + if (j == n) { + // Couldn't find means it's singular + throw new RuntimeException("Not invertble"); + } + + for (int k = 0; k < n; k++) { + // Swap rows i,j + temp = inMatrix[i * n + k]; + inMatrix[i * n + k] = inMatrix[j * n + k]; + inMatrix[j * n + k] = temp; + + temp = outMatrix[i * n + k]; + outMatrix[i * n + k] = outMatrix[j * n + k]; + outMatrix[j * n + k] = temp; + } + } + + temp = gfInv(inMatrix[i * n + i]); // 1/pivot + for (j = 0; j < n; j++) { + // Scale row i by 1/pivot + inMatrix[i * n + j] = gfMul(inMatrix[i * n + j], temp); + outMatrix[i * n + j] = gfMul(outMatrix[i * n + j], temp); + } + + for (j = 0; j < n; j++) { + if (j == i) { + continue; + } + + temp = inMatrix[j * n + i]; + for (int k = 0; k < n; k++) { + outMatrix[j * n + k] ^= gfMul(temp, outMatrix[i * n + k]); + inMatrix[j * n + k] ^= gfMul(temp, inMatrix[i * n + k]); + } + } + } + } + + /** + * Ported from Intel ISA-L library. + * + * Calculates const table gftbl in GF(2^8) from single input A + * gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20}, + * ... , A{f0} } -- from ISA-L implementation + */ + public static void gfVectMulInit(byte c, byte[] tbl, int offset) { + byte c2 = (byte) ((c << 1) ^ ((c & 0x80) != 0 ? 0x1d : 0)); + byte c4 = (byte) ((c2 << 1) ^ ((c2 & 0x80) != 0 ? 0x1d : 0)); + byte c8 = (byte) ((c4 << 1) ^ ((c4 & 0x80) != 0 ? 0x1d : 0)); + + byte c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15; + byte c17, c18, c19, c20, c21, c22, c23, c24, c25, c26, + c27, c28, c29, c30, c31; + + c3 = (byte) (c2 ^ c); + c5 = (byte) (c4 ^ c); + c6 = (byte) (c4 ^ c2); + c7 = (byte) (c4 ^ c3); + + c9 = (byte) (c8 ^ c); + c10 = (byte) (c8 ^ c2); + c11 = (byte) (c8 ^ c3); + c12 = (byte) (c8 ^ c4); + c13 = (byte) (c8 ^ c5); + c14 = (byte) (c8 ^ c6); + c15 = (byte) (c8 ^ c7); + + tbl[offset + 0] = 0; + tbl[offset + 1] = c; + tbl[offset + 2] = c2; + tbl[offset + 3] = c3; + tbl[offset + 4] = c4; + tbl[offset + 5] = c5; + tbl[offset + 6] = c6; + tbl[offset + 7] = c7; + tbl[offset + 8] = c8; + tbl[offset + 9] = c9; + tbl[offset + 10] = c10; + tbl[offset + 11] = c11; + tbl[offset + 12] = c12; + tbl[offset + 13] = c13; + tbl[offset + 14] = c14; + tbl[offset + 15] = c15; + + c17 = (byte) ((c8 << 1) ^ ((c8 & 0x80) != 0 ? 0x1d : 0)); + c18 = (byte) ((c17 << 1) ^ ((c17 & 0x80) != 0 ? 0x1d : 0)); + c19 = (byte) (c18 ^ c17); + c20 = (byte) ((c18 << 1) ^ ((c18 & 0x80) != 0 ? 0x1d : 0)); + c21 = (byte) (c20 ^ c17); + c22 = (byte) (c20 ^ c18); + c23 = (byte) (c20 ^ c19); + c24 = (byte) ((c20 << 1) ^ ((c20 & 0x80) != 0 ? 0x1d : 0)); + c25 = (byte) (c24 ^ c17); + c26 = (byte) (c24 ^ c18); + c27 = (byte) (c24 ^ c19); + c28 = (byte) (c24 ^ c20); + c29 = (byte) (c24 ^ c21); + c30 = (byte) (c24 ^ c22); + c31 = (byte) (c24 ^ c23); + + tbl[offset + 16] = 0; + tbl[offset + 17] = c17; + tbl[offset + 18] = c18; + tbl[offset + 19] = c19; + tbl[offset + 20] = c20; + tbl[offset + 21] = c21; + tbl[offset + 22] = c22; + tbl[offset + 23] = c23; + tbl[offset + 24] = c24; + tbl[offset + 25] = c25; + tbl[offset + 26] = c26; + tbl[offset + 27] = c27; + tbl[offset + 28] = c28; + tbl[offset + 29] = c29; + tbl[offset + 30] = c30; + tbl[offset + 31] = c31; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil.java index d6a10a4dc0..a3b0e39628 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil.java @@ -20,7 +20,7 @@ import org.apache.hadoop.classification.InterfaceAudience; /** - * Some utilities for Reed-Solomon coding. + * Utilities for implementing Reed-Solomon code, used by RS coder. */ @InterfaceAudience.Private public class RSUtil { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil2.java new file mode 100644 index 0000000000..84121a8380 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil2.java @@ -0,0 +1,172 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder.util; + +import org.apache.hadoop.classification.InterfaceAudience; + +import java.nio.ByteBuffer; + +/** + * Utilities for implementing Reed-Solomon code, used by RS2 coder. Some of the + * codes are borrowed from ISA-L implementation (C or ASM codes). + */ +@InterfaceAudience.Private +public final class RSUtil2 { + + private RSUtil2() { } + + public static void initTables(int k, int rows, byte[] codingMatrix, + int matrixOffset, byte[] gfTables) { + int i, j; + + int offset = 0, idx = matrixOffset; + for (i = 0; i < rows; i++) { + for (j = 0; j < k; j++) { + GF256.gfVectMulInit(codingMatrix[idx++], gfTables, offset); + offset += 32; + } + } + } + + /** + * Ported from Intel ISA-L library. + */ + public static void genCauchyMatrix(byte[] a, int m, int k) { + // Identity matrix in high position + for (int i = 0; i < k; i++) { + a[k * i + i] = 1; + } + + // For the rest choose 1/(i + j) | i != j + int pos = k * k; + for (int i = k; i < m; i++) { + for (int j = 0; j < k; j++) { + a[pos++] = GF256.gfInv((byte) (i ^ j)); + } + } + } + + /** + * Encode a group of inputs data and generate the outputs. It's also used for + * decoding because, in this implementation, encoding and decoding are + * unified. + * + * The algorithm is ported from Intel ISA-L library for compatible. It + * leverages Java auto-vectorization support for performance. + */ + public static void encodeData(byte[] gfTables, int dataLen, byte[][] inputs, + int[] inputOffsets, byte[][] outputs, + int[] outputOffsets) { + int numInputs = inputs.length; + int numOutputs = outputs.length; + int l, i, j, iPos, oPos; + byte[] input, output; + byte s; + final int times = dataLen / 8; + final int extra = dataLen - dataLen % 8; + byte[] tableLine; + + for (l = 0; l < numOutputs; l++) { + output = outputs[l]; + + for (j = 0; j < numInputs; j++) { + input = inputs[j]; + iPos = inputOffsets[j]; + oPos = outputOffsets[l]; + + s = gfTables[j * 32 + l * numInputs * 32 + 1]; + tableLine = GF256.gfMulTab()[s & 0xff]; + + /** + * Purely for performance, assuming we can use 8 bytes in the SIMD + * instruction. Subject to be improved. + */ + for (i = 0; i < times; i++, iPos += 8, oPos += 8) { + output[oPos + 0] ^= tableLine[0xff & input[iPos + 0]]; + output[oPos + 1] ^= tableLine[0xff & input[iPos + 1]]; + output[oPos + 2] ^= tableLine[0xff & input[iPos + 2]]; + output[oPos + 3] ^= tableLine[0xff & input[iPos + 3]]; + output[oPos + 4] ^= tableLine[0xff & input[iPos + 4]]; + output[oPos + 5] ^= tableLine[0xff & input[iPos + 5]]; + output[oPos + 6] ^= tableLine[0xff & input[iPos + 6]]; + output[oPos + 7] ^= tableLine[0xff & input[iPos + 7]]; + } + + /** + * For the left bytes, do it one by one. + */ + for (i = extra; i < dataLen; i++, iPos++, oPos++) { + output[oPos] ^= tableLine[0xff & input[iPos]]; + } + } + } + } + + /** + * See above. Try to use the byte[] version when possible. + */ + public static void encodeData(byte[] gfTables, ByteBuffer[] inputs, + ByteBuffer[] outputs) { + int numInputs = inputs.length; + int numOutputs = outputs.length; + int dataLen = inputs[0].remaining(); + int l, i, j, iPos, oPos; + ByteBuffer input, output; + byte s; + final int times = dataLen / 8; + final int extra = dataLen - dataLen % 8; + byte[] tableLine; + + for (l = 0; l < numOutputs; l++) { + output = outputs[l]; + + for (j = 0; j < numInputs; j++) { + input = inputs[j]; + iPos = input.position(); + oPos = output.position(); + + s = gfTables[j * 32 + l * numInputs * 32 + 1]; + tableLine = GF256.gfMulTab()[s & 0xff]; + + for (i = 0; i < times; i++, iPos += 8, oPos += 8) { + output.put(oPos + 0, (byte) (output.get(oPos + 0) ^ + tableLine[0xff & input.get(iPos + 0)])); + output.put(oPos + 1, (byte) (output.get(oPos + 1) ^ + tableLine[0xff & input.get(iPos + 1)])); + output.put(oPos + 2, (byte) (output.get(oPos + 2) ^ + tableLine[0xff & input.get(iPos + 2)])); + output.put(oPos + 3, (byte) (output.get(oPos + 3) ^ + tableLine[0xff & input.get(iPos + 3)])); + output.put(oPos + 4, (byte) (output.get(oPos + 4) ^ + tableLine[0xff & input.get(iPos + 4)])); + output.put(oPos + 5, (byte) (output.get(oPos + 5) ^ + tableLine[0xff & input.get(iPos + 5)])); + output.put(oPos + 6, (byte) (output.get(oPos + 6) ^ + tableLine[0xff & input.get(iPos + 6)])); + output.put(oPos + 7, (byte) (output.get(oPos + 7) ^ + tableLine[0xff & input.get(iPos + 7)])); + } + + for (i = extra; i < dataLen; i++, iPos++, oPos++) { + output.put(oPos, (byte) (output.get(oPos) ^ + tableLine[0xff & input.get(iPos)])); + } + } + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoder.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoder.java index 3e37e17ed8..26458f37b7 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoder.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoder.java @@ -31,95 +31,4 @@ public void setup() { this.decoderClass = RSRawDecoder.class; setAllowDump(false); // Change to true to allow verbose dump for debugging } - - @Test - public void testCoding_6x3_erasing_all_d() { - prepare(null, 6, 3, new int[]{0, 1, 2}, new int[0], true); - testCodingDoMixAndTwice(); - } - - @Test - public void testCoding_6x3_erasing_d0_d2() { - prepare(null, 6, 3, new int[] {0, 2}, new int[]{}); - testCodingDoMixAndTwice(); - } - - @Test - public void testCoding_6x3_erasing_d0() { - prepare(null, 6, 3, new int[]{0}, new int[0]); - testCodingDoMixAndTwice(); - } - - @Test - public void testCoding_6x3_erasing_d2() { - prepare(null, 6, 3, new int[]{2}, new int[]{}); - testCodingDoMixAndTwice(); - } - - @Test - public void testCoding_6x3_erasing_d0_p0() { - prepare(null, 6, 3, new int[]{0}, new int[]{0}); - testCodingDoMixAndTwice(); - } - - @Test - public void testCoding_6x3_erasing_all_p() { - prepare(null, 6, 3, new int[0], new int[]{0, 1, 2}); - testCodingDoMixAndTwice(); - } - - @Test - public void testCoding_6x3_erasing_p0() { - prepare(null, 6, 3, new int[0], new int[]{0}); - testCodingDoMixAndTwice(); - } - - @Test - public void testCoding_6x3_erasing_p2() { - prepare(null, 6, 3, new int[0], new int[]{2}); - testCodingDoMixAndTwice(); - } - - @Test - public void testCoding_6x3_erasure_p0_p2() { - prepare(null, 6, 3, new int[0], new int[]{0, 2}); - testCodingDoMixAndTwice(); - } - - @Test - public void testCoding_6x3_erasing_d0_p0_p1() { - prepare(null, 6, 3, new int[]{0}, new int[]{0, 1}); - testCodingDoMixAndTwice(); - } - - @Test - public void testCoding_6x3_erasing_d0_d2_p2() { - prepare(null, 6, 3, new int[]{0, 2}, new int[]{2}); - testCodingDoMixAndTwice(); - } - - @Test - public void testCodingNegative_6x3_erasing_d2_d4() { - prepare(null, 6, 3, new int[]{2, 4}, new int[0]); - testCodingDoMixAndTwice(); - } - - @Test - public void testCodingNegative_6x3_erasing_too_many() { - prepare(null, 6, 3, new int[]{2, 4}, new int[]{0, 1}); - testCodingWithErasingTooMany(); - } - - @Test - public void testCoding_10x4_erasing_d0_p0() { - prepare(null, 10, 4, new int[] {0}, new int[] {0}); - testCodingDoMixAndTwice(); - } - - @Test - public void testCodingInputBufferPosition() { - prepare(null, 6, 3, new int[]{0}, new int[]{0}); - testInputPosition(false); - testInputPosition(true); - } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoder2.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoder2.java new file mode 100644 index 0000000000..3e11d143d1 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoder2.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.erasurecode.rawcoder; + +import org.junit.Before; + +/** + * Test the new raw Reed-solomon coder implemented in Java. + */ +public class TestRSRawCoder2 extends TestRSRawCoderBase { + + @Before + public void setup() { + this.encoderClass = RSRawEncoder2.class; + this.decoderClass = RSRawDecoder2.class; + setAllowDump(false); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoderBase.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoderBase.java index efde33211a..b03b051dd0 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoderBase.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoderBase.java @@ -17,42 +17,101 @@ */ package org.apache.hadoop.io.erasurecode.rawcoder; -import org.apache.hadoop.io.erasurecode.rawcoder.util.RSUtil; +import org.junit.Test; /** * Test base for raw Reed-solomon coders. */ public abstract class TestRSRawCoderBase extends TestRawCoderBase { - private static int symbolSize = 0; - private static int symbolMax = 0; - - private static int RS_FIXED_DATA_GENERATOR = 0; - - static { - symbolSize = (int) Math.round(Math.log( - RSUtil.GF.getFieldSize()) / Math.log(2)); - symbolMax = (int) Math.pow(2, symbolSize); + @Test + public void testCoding_6x3_erasing_all_d() { + prepare(null, 6, 3, new int[]{0, 1, 2}, new int[0], true); + testCodingDoMixAndTwice(); } - @Override - protected byte[] generateData(int len) { - byte[] buffer = new byte[len]; - for (int i = 0; i < buffer.length; i++) { - buffer[i] = (byte) RAND.nextInt(symbolMax); - } - return buffer; + @Test + public void testCoding_6x3_erasing_d0_d2() { + prepare(null, 6, 3, new int[] {0, 2}, new int[]{}); + testCodingDoMixAndTwice(); } - @Override - protected byte[] generateFixedData(int len) { - byte[] buffer = new byte[len]; - for (int i = 0; i < buffer.length; i++) { - buffer[i] = (byte) RS_FIXED_DATA_GENERATOR++; - if (RS_FIXED_DATA_GENERATOR == symbolMax) { - RS_FIXED_DATA_GENERATOR = 0; - } - } - return buffer; + @Test + public void testCoding_6x3_erasing_d0() { + prepare(null, 6, 3, new int[]{0}, new int[0]); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_d2() { + prepare(null, 6, 3, new int[]{2}, new int[]{}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_d0_p0() { + prepare(null, 6, 3, new int[]{0}, new int[]{0}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_all_p() { + prepare(null, 6, 3, new int[0], new int[]{0, 1, 2}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_p0() { + prepare(null, 6, 3, new int[0], new int[]{0}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_p2() { + prepare(null, 6, 3, new int[0], new int[]{2}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasure_p0_p2() { + prepare(null, 6, 3, new int[0], new int[]{0, 2}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_d0_p0_p1() { + prepare(null, 6, 3, new int[]{0}, new int[]{0, 1}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCoding_6x3_erasing_d0_d2_p2() { + prepare(null, 6, 3, new int[]{0, 2}, new int[]{2}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCodingNegative_6x3_erasing_d2_d4() { + prepare(null, 6, 3, new int[]{2, 4}, new int[0]); + testCodingDoMixAndTwice(); + } + + @Test + public void testCodingNegative_6x3_erasing_too_many() { + prepare(null, 6, 3, new int[]{2, 4}, new int[]{0, 1}); + testCodingWithErasingTooMany(); + } + + @Test + public void testCoding_10x4_erasing_d0_p0() { + prepare(null, 10, 4, new int[] {0}, new int[] {0}); + testCodingDoMixAndTwice(); + } + + @Test + public void testCodingInputBufferPosition() { + prepare(null, 6, 3, new int[]{0}, new int[]{0}); + testInputPosition(false); + testInputPosition(true); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestXORRawCoder.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestXORRawCoder.java index 3b07b24c88..48463ad1fd 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestXORRawCoder.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestXORRawCoder.java @@ -29,7 +29,6 @@ public class TestXORRawCoder extends TestRawCoderBase { public void setup() { this.encoderClass = XORRawEncoder.class; this.decoderClass = XORRawDecoder.class; - setAllowDump(false); } @Test