From 1a37b0fe9aaf30e775de63c538aeac82fe378400 Mon Sep 17 00:00:00 2001 From: Kai Zheng Date: Thu, 9 Apr 2015 01:30:02 +0800 Subject: [PATCH] HDFS-8074 Define a system-wide default EC schema. Contributed by Kai Zheng --- .../src/main/conf/ecschema-def.xml | 5 -- .../hadoop/io/erasurecode/ECSchema.java | 57 ++++++++++++++++- .../hadoop-hdfs/CHANGES-HDFS-EC-7285.txt | 4 +- .../hdfs/server/namenode/ECSchemaManager.java | 62 +++++++++++++++++++ 4 files changed, 120 insertions(+), 8 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ECSchemaManager.java diff --git a/hadoop-common-project/hadoop-common/src/main/conf/ecschema-def.xml b/hadoop-common-project/hadoop-common/src/main/conf/ecschema-def.xml index e6194857ef..e36d38650e 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/ecschema-def.xml +++ b/hadoop-common-project/hadoop-common/src/main/conf/ecschema-def.xml @@ -27,11 +27,6 @@ You can modify and remove those not used yet, or add new ones. --> - - 6 - 3 - RS - 10 4 diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECSchema.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECSchema.java index 27be00e673..8c3310e406 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECSchema.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ECSchema.java @@ -23,12 +23,12 @@ /** * Erasure coding schema to housekeeper relevant information. */ -public class ECSchema { +public final class ECSchema { public static final String NUM_DATA_UNITS_KEY = "k"; public static final String NUM_PARITY_UNITS_KEY = "m"; public static final String CODEC_NAME_KEY = "codec"; public static final String CHUNK_SIZE_KEY = "chunkSize"; - public static final int DEFAULT_CHUNK_SIZE = 64 * 1024; // 64K + public static final int DEFAULT_CHUNK_SIZE = 256 * 1024; // 256K private String schemaName; private String codecName; @@ -81,6 +81,18 @@ public ECSchema(String schemaName, Map options) { initWith(codecName, dataUnits, parityUnits, options); } + /** + * Constructor with key parameters provided. + * @param schemaName + * @param codecName + * @param numDataUnits + * @param numParityUnits + */ + public ECSchema(String schemaName, String codecName, + int numDataUnits, int numParityUnits) { + this(schemaName, codecName, numDataUnits, numParityUnits, null); + } + /** * Constructor with key parameters provided. Note the options may contain * additional information for the erasure codec to interpret further. @@ -200,4 +212,45 @@ public String toString() { return sb.toString(); } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + ECSchema ecSchema = (ECSchema) o; + + if (numDataUnits != ecSchema.numDataUnits) { + return false; + } + if (numParityUnits != ecSchema.numParityUnits) { + return false; + } + if (chunkSize != ecSchema.chunkSize) { + return false; + } + if (!schemaName.equals(ecSchema.schemaName)) { + return false; + } + if (!codecName.equals(ecSchema.codecName)) { + return false; + } + return options.equals(ecSchema.options); + } + + @Override + public int hashCode() { + int result = schemaName.hashCode(); + result = 31 * result + codecName.hashCode(); + result = 31 * result + options.hashCode(); + result = 31 * result + numDataUnits; + result = 31 * result + numParityUnits; + result = 31 * result + chunkSize; + + return result; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt index 74230337b7..5078a15274 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt @@ -52,4 +52,6 @@ manage EC zones (Zhe Zhang) HDFS-8023. Erasure Coding: retrieve eraure coding schema for a file from - NameNode (vinayakumarb) \ No newline at end of file + NameNode (vinayakumarb) + + HDFS-8074. Define a system-wide default EC schema. (Kai Zheng) \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ECSchemaManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ECSchemaManager.java new file mode 100644 index 0000000000..b001c57fd2 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ECSchemaManager.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.io.erasurecode.ECSchema; + +/** + * This manages EC schemas predefined and activated in the system. It loads from + * predefined ones in XML and syncs with persisted ones in NameNode image. + * + * This class is instantiated by the FSNamesystem. + */ +@InterfaceAudience.LimitedPrivate({"HDFS"}) +public final class ECSchemaManager { + + private static final int DEFAULT_DATA_BLOCKS = 6; + private static final int DEFAULT_PARITY_BLOCKS = 3; + private static final String DEFAULT_CODEC_NAME = "rs"; + private static final String DEFAULT_SCHEMA_NAME = "SYS-DEFAULT-RS-6-3"; + + private static ECSchema SYS_DEFAULT_SCHEMA = new ECSchema(DEFAULT_SCHEMA_NAME, + DEFAULT_CODEC_NAME, DEFAULT_DATA_BLOCKS, DEFAULT_PARITY_BLOCKS); + + /** + * Get system-wide default EC schema, which can be used by default when no + * schema is specified for an EC zone. + * @return schema + */ + public static ECSchema getSystemDefaultSchema() { + return SYS_DEFAULT_SCHEMA; + } + + /** + * Tell the specified schema is the system default one or not. + * @param schema + * @return true if it's the default false otherwise + */ + public static boolean isSystemDefault(ECSchema schema) { + if (schema == null) { + throw new IllegalArgumentException("Invalid schema parameter"); + } + + // schema name is the identifier, but for safety we check all properties. + return SYS_DEFAULT_SCHEMA.equals(schema); + } +}