HADOOP-13200. Implement customizable and configurable erasure coders. Contributed by Tim Yao.

This commit is contained in:
Wei-Chiu Chuang 2017-04-27 11:59:33 -07:00
parent d8a3309830
commit 872088c6e7
19 changed files with 138 additions and 76 deletions

View File

@ -18,8 +18,6 @@
package org.apache.hadoop.io.erasurecode;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
@ -30,18 +28,12 @@
import org.apache.hadoop.io.erasurecode.codec.XORErasureCodec;
import org.apache.hadoop.io.erasurecode.coder.ErasureDecoder;
import org.apache.hadoop.io.erasurecode.coder.ErasureEncoder;
import org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawErasureCoderFactory;
import org.apache.hadoop.io.erasurecode.rawcoder.NativeXORRawErasureCoderFactory;
import org.apache.hadoop.io.erasurecode.rawcoder.RSRawErasureCoderFactory;
import org.apache.hadoop.io.erasurecode.rawcoder.RSLegacyRawErasureCoderFactory;
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoderFactory;
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureEncoder;
import org.apache.hadoop.io.erasurecode.rawcoder.XORRawErasureCoderFactory;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.Map;
/**
* A codec & coder utility to help create coders conveniently.
@ -79,27 +71,12 @@ public final class CodecUtil {
/** Comma separated raw codec name. The first coder is prior to the latter. */
public static final String IO_ERASURECODE_CODEC_RS_LEGACY_RAWCODERS_KEY =
IO_ERASURECODE_CODEC + "rs-legacy.rawcoders";
public static final String IO_ERASURECODE_CODEC_RS_LEGACY_RAWCODERS_DEFAULT =
RSLegacyRawErasureCoderFactory.class.getCanonicalName();
public static final String IO_ERASURECODE_CODEC_RS_RAWCODERS_KEY =
IO_ERASURECODE_CODEC + "rs.rawcoders";
public static final String IO_ERASURECODE_CODEC_RS_RAWCODERS_DEFAULT =
NativeRSRawErasureCoderFactory.class.getCanonicalName() +
"," + RSRawErasureCoderFactory.class.getCanonicalName();
/** Raw coder factory for the XOR codec. */
public static final String IO_ERASURECODE_CODEC_XOR_RAWCODERS_KEY =
IO_ERASURECODE_CODEC + "xor.rawcoders";
public static final String IO_ERASURECODE_CODEC_XOR_RAWCODERS_DEFAULT =
NativeXORRawErasureCoderFactory.class.getCanonicalName() +
"," + XORRawErasureCoderFactory.class.getCanonicalName();
// Default coders for each codec names.
public static final Map<String, String> DEFAULT_CODERS_MAP = ImmutableMap.of(
"rs", IO_ERASURECODE_CODEC_RS_RAWCODERS_DEFAULT,
"rs-legacy", IO_ERASURECODE_CODEC_RS_LEGACY_RAWCODERS_DEFAULT,
"xor", IO_ERASURECODE_CODEC_XOR_RAWCODERS_DEFAULT
);
private CodecUtil() { }
@ -168,70 +145,61 @@ public static RawErasureDecoder createRawDecoder(
}
private static RawErasureCoderFactory createRawCoderFactory(
Configuration conf, String rawCoderFactoryKey) {
String coderName, String codecName) {
RawErasureCoderFactory fact;
try {
Class<? extends RawErasureCoderFactory> factClass = conf.getClassByName(
rawCoderFactoryKey).asSubclass(RawErasureCoderFactory.class);
fact = factClass.newInstance();
} catch (ClassNotFoundException | InstantiationException |
IllegalAccessException e) {
throw new RuntimeException("Failed to create raw coder factory", e);
}
if (fact == null) {
throw new RuntimeException("Failed to create raw coder factory");
}
fact = CodecRegistry.getInstance().
getCoderByName(codecName, coderName);
return fact;
}
// Return comma separated coder names
private static String getRawCoders(Configuration conf, String codec) {
return conf.get(
IO_ERASURECODE_CODEC + codec + ".rawcoders",
DEFAULT_CODERS_MAP.getOrDefault(codec, codec)
// Return a list of coder names
private static String[] getRawCoderNames(
Configuration conf, String codecName) {
return conf.getStrings(
IO_ERASURECODE_CODEC + codecName + ".rawcoders",
CodecRegistry.getInstance().getCoderNames(codecName)
);
}
private static RawErasureEncoder createRawEncoderWithFallback(
Configuration conf, String codec, ErasureCoderOptions coderOptions) {
String coders = getRawCoders(conf, codec);
for (String factName : Splitter.on(",").split(coders)) {
Configuration conf, String codecName, ErasureCoderOptions coderOptions) {
String[] rawCoderNames = getRawCoderNames(conf, codecName);
for (String rawCoderName : rawCoderNames) {
try {
if (factName != null) {
RawErasureCoderFactory fact = createRawCoderFactory(conf,
factName);
if (rawCoderName != null) {
RawErasureCoderFactory fact = createRawCoderFactory(
rawCoderName, codecName);
return fact.createEncoder(coderOptions);
}
} catch (LinkageError | Exception e) {
// Fallback to next coder if possible
LOG.warn("Failed to create raw erasure encoder " + factName +
LOG.warn("Failed to create raw erasure encoder " + rawCoderName +
", fallback to next codec if possible", e);
}
}
throw new IllegalArgumentException("Fail to create raw erasure " +
"encoder with given codec: " + codec);
"encoder with given codec: " + codecName);
}
private static RawErasureDecoder createRawDecoderWithFallback(
Configuration conf, String codec, ErasureCoderOptions coderOptions) {
String coders = getRawCoders(conf, codec);
for (String factName : Splitter.on(",").split(coders)) {
Configuration conf, String codecName, ErasureCoderOptions coderOptions) {
String[] coders = getRawCoderNames(conf, codecName);
for (String rawCoderName : coders) {
try {
if (factName != null) {
RawErasureCoderFactory fact = createRawCoderFactory(conf,
factName);
if (rawCoderName != null) {
RawErasureCoderFactory fact = createRawCoderFactory(
rawCoderName, codecName);
return fact.createDecoder(coderOptions);
}
} catch (LinkageError | Exception e) {
// Fallback to next coder if possible
LOG.warn("Failed to create raw erasure decoder " + factName +
LOG.warn("Failed to create raw erasure decoder " + rawCoderName +
", fallback to next codec if possible", e);
}
}
throw new IllegalArgumentException("Fail to create raw erasure " +
"encoder with given codec: " + codec);
"encoder with given codec: " + codecName);
}
private static ErasureCodec createCodec(Configuration conf,

View File

@ -25,6 +25,7 @@ public final class ErasureCodeConstants {
private ErasureCodeConstants() {
}
public static final String DUMMY_CODEC_NAME = "dummy";
public static final String RS_CODEC_NAME = "rs";
public static final String RS_LEGACY_CODEC_NAME = "rs-legacy";
public static final String XOR_CODEC_NAME = "xor";

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.io.erasurecode.rawcoder;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.io.erasurecode.ErasureCodeConstants;
import org.apache.hadoop.io.erasurecode.ErasureCoderOptions;
/**
@ -25,6 +26,7 @@
*/
@InterfaceAudience.Private
public class DummyRawErasureCoderFactory implements RawErasureCoderFactory {
public static final String CODER_NAME = "dummy_dummy";
@Override
public RawErasureEncoder createEncoder(ErasureCoderOptions coderOptions) {
@ -35,4 +37,14 @@ public RawErasureEncoder createEncoder(ErasureCoderOptions coderOptions) {
public RawErasureDecoder createDecoder(ErasureCoderOptions coderOptions) {
return new DummyRawDecoder(coderOptions);
}
@Override
public String getCoderName() {
return CODER_NAME;
}
@Override
public String getCodecName() {
return ErasureCodeConstants.DUMMY_CODEC_NAME;
}
}

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.io.erasurecode.rawcoder;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.io.erasurecode.ErasureCodeConstants;
import org.apache.hadoop.io.erasurecode.ErasureCoderOptions;
/**
@ -27,6 +28,8 @@
@InterfaceAudience.Private
public class NativeRSRawErasureCoderFactory implements RawErasureCoderFactory {
public static final String CODER_NAME = "rs_native";
@Override
public RawErasureEncoder createEncoder(ErasureCoderOptions coderOptions) {
return new NativeRSRawEncoder(coderOptions);
@ -36,4 +39,14 @@ public RawErasureEncoder createEncoder(ErasureCoderOptions coderOptions) {
public RawErasureDecoder createDecoder(ErasureCoderOptions coderOptions) {
return new NativeRSRawDecoder(coderOptions);
}
@Override
public String getCoderName() {
return CODER_NAME;
}
@Override
public String getCodecName() {
return ErasureCodeConstants.RS_CODEC_NAME;
}
}

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.io.erasurecode.rawcoder;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.io.erasurecode.ErasureCodeConstants;
import org.apache.hadoop.io.erasurecode.ErasureCoderOptions;
/**
@ -27,6 +28,8 @@
@InterfaceAudience.Private
public class NativeXORRawErasureCoderFactory implements RawErasureCoderFactory {
public static final String CODER_NAME = "xor_native";
@Override
public RawErasureEncoder createEncoder(ErasureCoderOptions coderOptions) {
return new NativeXORRawEncoder(coderOptions);
@ -36,4 +39,14 @@ public RawErasureEncoder createEncoder(ErasureCoderOptions coderOptions) {
public RawErasureDecoder createDecoder(ErasureCoderOptions coderOptions) {
return new NativeXORRawDecoder(coderOptions);
}
@Override
public String getCoderName() {
return CODER_NAME;
}
@Override
public String getCodecName() {
return ErasureCodeConstants.XOR_CODEC_NAME;
}
}

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.io.erasurecode.rawcoder;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.io.erasurecode.ErasureCodeConstants;
import org.apache.hadoop.io.erasurecode.ErasureCoderOptions;
/**
@ -26,6 +27,8 @@
@InterfaceAudience.Private
public class RSLegacyRawErasureCoderFactory implements RawErasureCoderFactory {
public static final String CODER_NAME = "rs-legacy_java";
@Override
public RawErasureEncoder createEncoder(ErasureCoderOptions coderOptions) {
return new RSLegacyRawEncoder(coderOptions);
@ -35,4 +38,14 @@ public RawErasureEncoder createEncoder(ErasureCoderOptions coderOptions) {
public RawErasureDecoder createDecoder(ErasureCoderOptions coderOptions) {
return new RSLegacyRawDecoder(coderOptions);
}
@Override
public String getCoderName() {
return CODER_NAME;
}
@Override
public String getCodecName() {
return ErasureCodeConstants.RS_LEGACY_CODEC_NAME;
}
}

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.io.erasurecode.rawcoder;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.io.erasurecode.ErasureCodeConstants;
import org.apache.hadoop.io.erasurecode.ErasureCoderOptions;
/**
@ -26,6 +27,8 @@
@InterfaceAudience.Private
public class RSRawErasureCoderFactory implements RawErasureCoderFactory {
public static final String CODER_NAME = "rs_java";
@Override
public RawErasureEncoder createEncoder(ErasureCoderOptions coderOptions) {
return new RSRawEncoder(coderOptions);
@ -35,4 +38,14 @@ public RawErasureEncoder createEncoder(ErasureCoderOptions coderOptions) {
public RawErasureDecoder createDecoder(ErasureCoderOptions coderOptions) {
return new RSRawDecoder(coderOptions);
}
@Override
public String getCoderName() {
return CODER_NAME;
}
@Override
public String getCodecName() {
return ErasureCodeConstants.RS_CODEC_NAME;
}
}

View File

@ -41,4 +41,16 @@ public interface RawErasureCoderFactory {
* @return raw erasure decoder
*/
RawErasureDecoder createDecoder(ErasureCoderOptions coderOptions);
/**
* Get the name of the coder.
* @return coder name
*/
String getCoderName();
/**
* Get the name of its codec.
* @return codec name
*/
String getCodecName();
}

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.io.erasurecode.rawcoder;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.io.erasurecode.ErasureCodeConstants;
import org.apache.hadoop.io.erasurecode.ErasureCoderOptions;
/**
@ -26,6 +27,8 @@
@InterfaceAudience.Private
public class XORRawErasureCoderFactory implements RawErasureCoderFactory {
public static final String CODER_NAME = "xor_java";
@Override
public RawErasureEncoder createEncoder(ErasureCoderOptions coderOptions) {
return new XORRawEncoder(coderOptions);
@ -35,4 +38,14 @@ public RawErasureEncoder createEncoder(ErasureCoderOptions coderOptions) {
public RawErasureDecoder createDecoder(ErasureCoderOptions coderOptions) {
return new XORRawDecoder(coderOptions);
}
@Override
public String getCoderName() {
return CODER_NAME;
}
@Override
public String getCodecName() {
return ErasureCodeConstants.XOR_CODEC_NAME;
}
}

View File

@ -668,7 +668,7 @@
<property>
<name>io.erasurecode.codec.rs.rawcoders</name>
<value>org.apache.hadoop.io.erasurecode.rawcoder.NativeRSRawErasureCoderFactory,org.apache.hadoop.io.erasurecode.rawcoder.RSRawErasureCoderFactory</value>
<value>rs_native,rs_java</value>
<description>
Comma separated raw coder implementations for the rs codec. The earlier
factory is prior to followings in case of failure of creating raw coders.
@ -677,7 +677,7 @@
<property>
<name>io.erasurecode.codec.rs-legacy.rawcoders</name>
<value>org.apache.hadoop.io.erasurecode.rawcoder.RSLegacyRawErasureCoderFactory</value>
<value>rs-legacy_java</value>
<description>
Comma separated raw coder implementations for the rs-legacy codec. The earlier
factory is prior to followings in case of failure of creating raw coders.
@ -686,7 +686,7 @@
<property>
<name>io.erasurecode.codec.xor.rawcoders</name>
<value>org.apache.hadoop.io.erasurecode.rawcoder.NativeXORRawErasureCoderFactory,org.apache.hadoop.io.erasurecode.rawcoder.XORRawErasureCoderFactory</value>
<value>xor_native,xor_java</value>
<description>
Comma separated raw coder implementations for the xor codec. The earlier
factory is prior to followings in case of failure of creating raw coders.

View File

@ -28,6 +28,7 @@
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureEncoder;
import org.apache.hadoop.io.erasurecode.rawcoder.XORRawDecoder;
import org.apache.hadoop.io.erasurecode.rawcoder.XORRawEncoder;
import org.apache.hadoop.io.erasurecode.rawcoder.XORRawErasureCoderFactory;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.Assert;
import org.junit.Before;
@ -104,8 +105,8 @@ public void testFallbackCoders() {
ErasureCoderOptions coderOptions = new ErasureCoderOptions(
numDataUnit, numParityUnit);
conf.set(CodecUtil.IO_ERASURECODE_CODEC_RS_RAWCODERS_KEY,
RSRawErasureCoderFactory.class.getCanonicalName() +
"," + NativeRSRawErasureCoderFactory.class.getCanonicalName());
RSRawErasureCoderFactory.CODER_NAME +
"," + NativeRSRawErasureCoderFactory.CODER_NAME);
// should return default raw coder of rs codec
RawErasureEncoder encoder = CodecUtil.createRawEncoder(
conf, ErasureCodeConstants.RS_CODEC_NAME, coderOptions);
@ -133,8 +134,7 @@ public void testIgnoreInvalidCodec() {
ErasureCoderOptions coderOptions = new ErasureCoderOptions(
numDataUnit, numParityUnit);
conf.set(CodecUtil.IO_ERASURECODE_CODEC_XOR_RAWCODERS_KEY,
"invalid-codec," +
"org.apache.hadoop.io.erasurecode.rawcoder.XORRawErasureCoderFactory");
"invalid-codec," + XORRawErasureCoderFactory.CODER_NAME);
// should return second coder specified by IO_ERASURECODE_CODEC_CODERS
RawErasureEncoder encoder = CodecUtil.createRawEncoder(
conf, ErasureCodeConstants.XOR_CODEC_NAME, coderOptions);

View File

@ -51,7 +51,7 @@ public void testCodingDirectBufferWithConf_10x4_erasing_d0() {
*/
Configuration conf = new Configuration();
conf.set(CodecUtil.IO_ERASURECODE_CODEC_RS_RAWCODERS_KEY,
RSRawErasureCoderFactory.class.getCanonicalName());
RSRawErasureCoderFactory.CODER_NAME);
prepare(conf, 10, 4, new int[]{0}, new int[0]);
testCoding(true);

View File

@ -58,7 +58,7 @@ public void testCodingDirectBufferWithConf_10x4_erasing_d0() {
*/
Configuration conf = new Configuration();
conf.set(CodecUtil.IO_ERASURECODE_CODEC_RS_RAWCODERS_KEY,
RSRawErasureCoderFactory.class.getCanonicalName());
RSRawErasureCoderFactory.CODER_NAME);
prepare(conf, 10, 4, new int[]{0}, new int[0]);
testCoding(true);

View File

@ -117,11 +117,15 @@ Deployment
be more appropriate. If the administrator only cares about node-level fault-tolerance, `RS-10-4-64k` would still be appropriate as long as
there are at least 14 DataNodes in the cluster.
The codec implementation for Reed-Solomon and XOR can be configured with the following client and DataNode configuration keys:
`io.erasurecode.codec.rs.rawcoder` for the default RS codec,
`io.erasurecode.codec.rs-legacy.rawcoder` for the legacy RS codec,
`io.erasurecode.codec.xor.rawcoder` for the XOR codec.
The default implementations for all of these codecs are pure Java. For default RS codec, there is also a native implementation which leverages Intel ISA-L library to improve the performance of codec. For XOR codec, a native implementation which leverages Intel ISA-L library to improve the performance of codec is also supported. Please refer to section "Enable Intel ISA-L" for more detail information.
The codec implementations for Reed-Solomon and XOR can be configured with the following client and DataNode configuration keys:
`io.erasurecode.codec.rs.rawcoders` for the default RS codec,
`io.erasurecode.codec.rs-legacy.rawcoders` for the legacy RS codec,
`io.erasurecode.codec.xor.rawcoders` for the XOR codec.
User can also configure self-defined codec with configuration key like:
`io.erasurecode.codec.self-defined-codec.rawcoders`.
The values for these key are lists of coder names with a fall-back mechanism.
All these codecs have implementations in pure Java. For default RS codec, there is also a native implementation which leverages Intel ISA-L library to improve the performance of codec. For XOR codec, a native implementation which leverages Intel ISA-L library to improve the performance of codec is also supported. Please refer to section "Enable Intel ISA-L" for more detail information.
The default implementation for RS Legacy is pure Java, and the default implementations for default RS and XOR are native implementations using Intel ISA-L library.
Erasure coding background recovery work on the DataNodes can also be tuned via the following configuration parameters:

View File

@ -98,7 +98,7 @@ public void setup() throws IOException {
if (ErasureCodeNative.isNativeCodeLoaded()) {
conf.set(
CodecUtil.IO_ERASURECODE_CODEC_RS_RAWCODERS_KEY,
NativeRSRawErasureCoderFactory.class.getCanonicalName());
NativeRSRawErasureCoderFactory.CODER_NAME);
}
SimulatedFSDataset.setFactory(conf);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(

View File

@ -85,7 +85,7 @@ public void setup() throws IOException {
if (ErasureCodeNative.isNativeCodeLoaded()) {
conf.set(
CodecUtil.IO_ERASURECODE_CODEC_RS_RAWCODERS_KEY,
NativeRSRawErasureCoderFactory.class.getCanonicalName());
NativeRSRawErasureCoderFactory.CODER_NAME);
}
DFSTestUtil.enableAllECPolicies(conf);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build();

View File

@ -214,7 +214,7 @@ private void setup(Configuration conf) throws IOException {
if (ErasureCodeNative.isNativeCodeLoaded()) {
conf.set(
CodecUtil.IO_ERASURECODE_CODEC_RS_RAWCODERS_KEY,
NativeRSRawErasureCoderFactory.class.getCanonicalName());
NativeRSRawErasureCoderFactory.CODER_NAME);
}
DFSTestUtil.enableAllECPolicies(conf);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build();

View File

@ -100,7 +100,7 @@ public void setup() throws IOException {
if (ErasureCodeNative.isNativeCodeLoaded()) {
conf.set(
CodecUtil.IO_ERASURECODE_CODEC_RS_RAWCODERS_KEY,
NativeRSRawErasureCoderFactory.class.getCanonicalName());
NativeRSRawErasureCoderFactory.CODER_NAME);
}
conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY,
StripedFileTestUtil.getDefaultECPolicy().getName());

View File

@ -68,7 +68,7 @@ public void setup() throws IOException {
if (ErasureCodeNative.isNativeCodeLoaded()) {
conf.set(
CodecUtil.IO_ERASURECODE_CODEC_RS_RAWCODERS_KEY,
NativeRSRawErasureCoderFactory.class.getCanonicalName());
NativeRSRawErasureCoderFactory.CODER_NAME);
}
DFSTestUtil.enableAllECPolicies(conf);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(