HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code ( Contributed by Kai Zheng)
This commit is contained in:
parent
aac73c21c3
commit
99502cbbe2
@ -37,3 +37,6 @@
|
|||||||
|
|
||||||
HADOOP-11805 Better to rename some raw erasure coders. Contributed by Kai Zheng
|
HADOOP-11805 Better to rename some raw erasure coders. Contributed by Kai Zheng
|
||||||
( Kai Zheng )
|
( Kai Zheng )
|
||||||
|
|
||||||
|
HADOOP-11645. Erasure Codec API covering the essential aspects for an erasure code
|
||||||
|
( Kai Zheng via vinayakumarb )
|
||||||
|
@ -79,4 +79,22 @@ public boolean anyErasedParityBlock() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get erased blocks count
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public int getErasedCount() {
|
||||||
|
int erasedCount = 0;
|
||||||
|
|
||||||
|
for (ECBlock dataBlock : dataBlocks) {
|
||||||
|
if (dataBlock.isErased()) erasedCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (ECBlock parityBlock : parityBlocks) {
|
||||||
|
if (parityBlock.isErased()) erasedCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return erasedCount;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,88 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.io.erasurecode.codec;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configured;
|
||||||
|
import org.apache.hadoop.io.erasurecode.ECSchema;
|
||||||
|
import org.apache.hadoop.io.erasurecode.coder.*;
|
||||||
|
import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract Erasure Codec that implements {@link ErasureCodec}.
|
||||||
|
*/
|
||||||
|
public abstract class AbstractErasureCodec extends Configured
|
||||||
|
implements ErasureCodec {
|
||||||
|
|
||||||
|
private ECSchema schema;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setSchema(ECSchema schema) {
|
||||||
|
this.schema = schema;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return schema.getCodecName();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected ECSchema getSchema() {
|
||||||
|
return schema;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BlockGrouper createBlockGrouper() {
|
||||||
|
BlockGrouper blockGrouper = new BlockGrouper();
|
||||||
|
blockGrouper.setSchema(getSchema());
|
||||||
|
|
||||||
|
return blockGrouper;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ErasureCoder createEncoder() {
|
||||||
|
ErasureCoder encoder = doCreateEncoder();
|
||||||
|
prepareErasureCoder(encoder);
|
||||||
|
return encoder;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new encoder instance to be initialized afterwards.
|
||||||
|
* @return encoder
|
||||||
|
*/
|
||||||
|
protected abstract ErasureCoder doCreateEncoder();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ErasureCoder createDecoder() {
|
||||||
|
ErasureCoder decoder = doCreateDecoder();
|
||||||
|
prepareErasureCoder(decoder);
|
||||||
|
return decoder;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new decoder instance to be initialized afterwards.
|
||||||
|
* @return decoder
|
||||||
|
*/
|
||||||
|
protected abstract ErasureCoder doCreateDecoder();
|
||||||
|
|
||||||
|
private void prepareErasureCoder(ErasureCoder erasureCoder) {
|
||||||
|
if (getSchema() == null) {
|
||||||
|
throw new RuntimeException("No schema been set yet");
|
||||||
|
}
|
||||||
|
|
||||||
|
erasureCoder.setConf(getConf());
|
||||||
|
erasureCoder.initialize(getSchema());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,56 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.io.erasurecode.codec;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configurable;
|
||||||
|
import org.apache.hadoop.io.erasurecode.ECSchema;
|
||||||
|
import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
|
||||||
|
import org.apache.hadoop.io.erasurecode.grouper.BlockGrouper;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Erasure Codec API that's to cover the essential specific aspects of a code.
|
||||||
|
* Currently it cares only block grouper and erasure coder. In future we may
|
||||||
|
* add more aspects here to make the behaviors customizable.
|
||||||
|
*/
|
||||||
|
public interface ErasureCodec extends Configurable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set EC schema to be used by this codec.
|
||||||
|
* @param schema
|
||||||
|
*/
|
||||||
|
public void setSchema(ECSchema schema);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create block grouper
|
||||||
|
* @return block grouper
|
||||||
|
*/
|
||||||
|
public BlockGrouper createBlockGrouper();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create Erasure Encoder
|
||||||
|
* @return erasure encoder
|
||||||
|
*/
|
||||||
|
public ErasureCoder createEncoder();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create Erasure Decoder
|
||||||
|
* @return erasure decoder
|
||||||
|
*/
|
||||||
|
public ErasureCoder createDecoder();
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,38 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.io.erasurecode.codec;
|
||||||
|
|
||||||
|
import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
|
||||||
|
import org.apache.hadoop.io.erasurecode.coder.RSErasureDecoder;
|
||||||
|
import org.apache.hadoop.io.erasurecode.coder.RSErasureEncoder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A Reed-Solomon erasure codec.
|
||||||
|
*/
|
||||||
|
public class RSErasureCodec extends AbstractErasureCodec {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ErasureCoder doCreateEncoder() {
|
||||||
|
return new RSErasureEncoder();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ErasureCoder doCreateDecoder() {
|
||||||
|
return new RSErasureDecoder();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,45 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.io.erasurecode.codec;
|
||||||
|
|
||||||
|
import org.apache.hadoop.io.erasurecode.ECSchema;
|
||||||
|
import org.apache.hadoop.io.erasurecode.coder.ErasureCoder;
|
||||||
|
import org.apache.hadoop.io.erasurecode.coder.XORErasureDecoder;
|
||||||
|
import org.apache.hadoop.io.erasurecode.coder.XORErasureEncoder;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A XOR erasure codec.
|
||||||
|
*/
|
||||||
|
public class XORErasureCodec extends AbstractErasureCodec {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setSchema(ECSchema schema) {
|
||||||
|
super.setSchema(schema);
|
||||||
|
assert(schema.getNumParityUnits() == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ErasureCoder doCreateEncoder() {
|
||||||
|
return new XORErasureEncoder();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ErasureCoder doCreateDecoder() {
|
||||||
|
return new XORErasureDecoder();
|
||||||
|
}
|
||||||
|
}
|
@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.conf.Configured;
|
import org.apache.hadoop.conf.Configured;
|
||||||
|
import org.apache.hadoop.io.erasurecode.ECSchema;
|
||||||
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoder;
|
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoder;
|
||||||
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoderFactory;
|
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureCoderFactory;
|
||||||
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
|
import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder;
|
||||||
@ -104,6 +105,12 @@ public void initialize(int numDataUnits, int numParityUnits,
|
|||||||
this.chunkSize = chunkSize;
|
this.chunkSize = chunkSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initialize(ECSchema schema) {
|
||||||
|
initialize(schema.getNumDataUnits(), schema.getNumParityUnits(),
|
||||||
|
schema.getChunkSize());
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getNumDataUnits() {
|
public int getNumDataUnits() {
|
||||||
return numDataUnits;
|
return numDataUnits;
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
import org.apache.hadoop.conf.Configurable;
|
import org.apache.hadoop.conf.Configurable;
|
||||||
import org.apache.hadoop.io.erasurecode.ECBlockGroup;
|
import org.apache.hadoop.io.erasurecode.ECBlockGroup;
|
||||||
|
import org.apache.hadoop.io.erasurecode.ECSchema;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An erasure coder to perform encoding or decoding given a group. Generally it
|
* An erasure coder to perform encoding or decoding given a group. Generally it
|
||||||
@ -44,6 +45,12 @@ public interface ErasureCoder extends Configurable {
|
|||||||
*/
|
*/
|
||||||
public void initialize(int numDataUnits, int numParityUnits, int chunkSize);
|
public void initialize(int numDataUnits, int numParityUnits, int chunkSize);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize with an EC schema.
|
||||||
|
* @param schema
|
||||||
|
*/
|
||||||
|
public void initialize(ECSchema schema);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The number of data input units for the coding. A unit can be a byte,
|
* The number of data input units for the coding. A unit can be a byte,
|
||||||
* chunk or buffer or even a block.
|
* chunk or buffer or even a block.
|
||||||
|
@ -0,0 +1,90 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.io.erasurecode.grouper;
|
||||||
|
|
||||||
|
import org.apache.hadoop.io.erasurecode.ECBlock;
|
||||||
|
import org.apache.hadoop.io.erasurecode.ECBlockGroup;
|
||||||
|
import org.apache.hadoop.io.erasurecode.ECSchema;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* As part of a codec, to handle how to form a block group for encoding
|
||||||
|
* and provide instructions on how to recover erased blocks from a block group
|
||||||
|
*/
|
||||||
|
public class BlockGrouper {
|
||||||
|
|
||||||
|
private ECSchema schema;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set EC schema.
|
||||||
|
* @param schema
|
||||||
|
*/
|
||||||
|
public void setSchema(ECSchema schema) {
|
||||||
|
this.schema = schema;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get EC schema.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected ECSchema getSchema() {
|
||||||
|
return schema;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get required data blocks count in a BlockGroup.
|
||||||
|
* @return count of required data blocks
|
||||||
|
*/
|
||||||
|
public int getRequiredNumDataBlocks() {
|
||||||
|
return schema.getNumDataUnits();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get required parity blocks count in a BlockGroup.
|
||||||
|
* @return count of required parity blocks
|
||||||
|
*/
|
||||||
|
public int getRequiredNumParityBlocks() {
|
||||||
|
return schema.getNumParityUnits();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculating and organizing BlockGroup, to be called by ECManager
|
||||||
|
* @param dataBlocks Data blocks to compute parity blocks against
|
||||||
|
* @param parityBlocks To be computed parity blocks
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public ECBlockGroup makeBlockGroup(ECBlock[] dataBlocks,
|
||||||
|
ECBlock[] parityBlocks) {
|
||||||
|
|
||||||
|
ECBlockGroup blockGroup = new ECBlockGroup(dataBlocks, parityBlocks);
|
||||||
|
return blockGroup;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a BlockGroup, tell if any of the missing blocks can be recovered,
|
||||||
|
* to be called by ECManager
|
||||||
|
* @param blockGroup a blockGroup that may contain erased blocks but not sure
|
||||||
|
* recoverable or not
|
||||||
|
* @return true if any erased block recoverable, false otherwise
|
||||||
|
*/
|
||||||
|
public boolean anyRecoverable(ECBlockGroup blockGroup) {
|
||||||
|
int erasedCount = blockGroup.getErasedCount();
|
||||||
|
|
||||||
|
return erasedCount > 0 && erasedCount <= getRequiredNumParityBlocks();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user