diff --git a/CHANGES.txt b/CHANGES.txt index 6d594598b3..a90b2e6b0c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -80,6 +80,9 @@ Trunk (unreleased changes) HADOOP-6472. add tokenCache option to GenericOptionsParser for passing file with secret keys to a map reduce job. (boryas) + HADOOP-6443. Serialization classes accept invalid metadata. + (Aaron Kimball via tomwhite) + OPTIMIZATIONS BUG FIXES diff --git a/src/java/org/apache/hadoop/io/serializer/JavaSerialization.java b/src/java/org/apache/hadoop/io/serializer/JavaSerialization.java index fea57e7063..2d74357470 100644 --- a/src/java/org/apache/hadoop/io/serializer/JavaSerialization.java +++ b/src/java/org/apache/hadoop/io/serializer/JavaSerialization.java @@ -99,9 +99,7 @@ public class JavaSerialization extends SerializationBase { } public boolean accept(Map metadata) { - String intendedSerializer = metadata.get(SERIALIZATION_KEY); - if (intendedSerializer != null && - !getClass().getName().equals(intendedSerializer)) { + if (!checkSerializationKey(metadata)) { return false; } diff --git a/src/java/org/apache/hadoop/io/serializer/SerializationBase.java b/src/java/org/apache/hadoop/io/serializer/SerializationBase.java index 7934b089fe..eb070e34e3 100644 --- a/src/java/org/apache/hadoop/io/serializer/SerializationBase.java +++ b/src/java/org/apache/hadoop/io/serializer/SerializationBase.java @@ -101,4 +101,17 @@ public abstract class SerializationBase extends Configured * for this given metadata. */ public abstract RawComparator getRawComparator(Map metadata); + + /** + * Check that the SERIALIZATION_KEY, if set, matches the current class. + * @param metadata the serialization metadata to check. + * @return true if SERIALIZATION_KEY is unset, or if it matches the current class + * (meaning that accept() should continue processing), or false if it is a mismatch, + * meaning that accept() should return false. + */ + protected boolean checkSerializationKey(Map metadata) { + String intendedSerializer = metadata.get(SERIALIZATION_KEY); + return intendedSerializer == null || + getClass().getName().equals(intendedSerializer); + } } diff --git a/src/java/org/apache/hadoop/io/serializer/WritableSerialization.java b/src/java/org/apache/hadoop/io/serializer/WritableSerialization.java index 7c0dec1af1..987888a8b5 100644 --- a/src/java/org/apache/hadoop/io/serializer/WritableSerialization.java +++ b/src/java/org/apache/hadoop/io/serializer/WritableSerialization.java @@ -135,11 +135,10 @@ public class WritableSerialization extends SerializationBase { @Override public boolean accept(Map metadata) { - String intendedSerializer = metadata.get(SERIALIZATION_KEY); - if (intendedSerializer != null && - !getClass().getName().equals(intendedSerializer)) { + if (!checkSerializationKey(metadata)) { return false; } + Class c = getClassFromMetadata(metadata); return c == null ? false : Writable.class.isAssignableFrom(c); } diff --git a/src/java/org/apache/hadoop/io/serializer/avro/AvroGenericSerialization.java b/src/java/org/apache/hadoop/io/serializer/avro/AvroGenericSerialization.java index 464b5fd88d..b02969cc5f 100644 --- a/src/java/org/apache/hadoop/io/serializer/avro/AvroGenericSerialization.java +++ b/src/java/org/apache/hadoop/io/serializer/avro/AvroGenericSerialization.java @@ -30,16 +30,18 @@ import org.apache.hadoop.io.serializer.SerializationBase; /** * Serialization for Avro Generic classes. For a class to be accepted by this - * serialization it must have metadata with key - * {@link SerializationBase#SERIALIZATION_KEY} set to {@link AvroGenericSerialization}'s - * fully-qualified classname. + * serialization it must have a schema specified. * The schema used is the one set by {@link AvroSerialization#AVRO_SCHEMA_KEY}. */ @SuppressWarnings("unchecked") public class AvroGenericSerialization extends AvroSerialization { - + @Override public boolean accept(Map metadata) { + if (!checkSerializationKey(metadata)) { + return false; + } + return metadata.get(AVRO_SCHEMA_KEY) != null; } diff --git a/src/java/org/apache/hadoop/io/serializer/avro/AvroReflectSerialization.java b/src/java/org/apache/hadoop/io/serializer/avro/AvroReflectSerialization.java index 28662f880b..c16e67b606 100644 --- a/src/java/org/apache/hadoop/io/serializer/avro/AvroReflectSerialization.java +++ b/src/java/org/apache/hadoop/io/serializer/avro/AvroReflectSerialization.java @@ -54,8 +54,8 @@ public class AvroReflectSerialization extends AvroSerialization{ if (packages == null) { getPackages(); } - if (getClass().getName().equals(metadata.get(SERIALIZATION_KEY))) { - return true; + if (!checkSerializationKey(metadata)) { + return false; } Class c = getClassFromMetadata(metadata); if (c == null) { diff --git a/src/java/org/apache/hadoop/io/serializer/avro/AvroSerialization.java b/src/java/org/apache/hadoop/io/serializer/avro/AvroSerialization.java index d1a649591f..c4fde845f2 100644 --- a/src/java/org/apache/hadoop/io/serializer/avro/AvroSerialization.java +++ b/src/java/org/apache/hadoop/io/serializer/avro/AvroSerialization.java @@ -141,8 +141,7 @@ public abstract class AvroSerialization extends SerializationBase { * @return a RawComparator parameterized for the specified Avro schema. */ public RawComparator getRawComparator(Map metadata) { - Schema schema = Schema.parse(metadata.get(AVRO_SCHEMA_KEY)); + Schema schema = getSchema(metadata); return new AvroComparator(schema); } - } diff --git a/src/java/org/apache/hadoop/io/serializer/avro/AvroSpecificSerialization.java b/src/java/org/apache/hadoop/io/serializer/avro/AvroSpecificSerialization.java index e60ee89b93..6101ab34a2 100644 --- a/src/java/org/apache/hadoop/io/serializer/avro/AvroSpecificSerialization.java +++ b/src/java/org/apache/hadoop/io/serializer/avro/AvroSpecificSerialization.java @@ -39,8 +39,8 @@ public class AvroSpecificSerialization @Override public boolean accept(Map metadata) { - if (getClass().getName().equals(metadata.get(SERIALIZATION_KEY))) { - return true; + if (!checkSerializationKey(metadata)) { + return false; } Class c = getClassFromMetadata(metadata); return c == null ? false : SpecificRecord.class.isAssignableFrom(c); diff --git a/src/test/core/org/apache/hadoop/io/serializer/TestWritableSerialization.java b/src/test/core/org/apache/hadoop/io/serializer/TestWritableSerialization.java index eb98a76108..562bc90290 100644 --- a/src/test/core/org/apache/hadoop/io/serializer/TestWritableSerialization.java +++ b/src/test/core/org/apache/hadoop/io/serializer/TestWritableSerialization.java @@ -23,15 +23,20 @@ import static org.apache.hadoop.io.TestGenericWritable.CONF_TEST_VALUE; import junit.framework.TestCase; import java.io.IOException; +import java.util.HashMap; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.TestGenericWritable.Foo; import org.apache.hadoop.io.TestGenericWritable.Bar; import org.apache.hadoop.io.TestGenericWritable.Baz; import org.apache.hadoop.io.TestGenericWritable.FooGenericWritable; +import org.apache.hadoop.io.serializer.DeserializerBase; +import org.apache.hadoop.io.serializer.SerializationBase; +import org.apache.hadoop.io.serializer.SerializerBase; import org.apache.hadoop.util.GenericsUtil; public class TestWritableSerialization extends TestCase { @@ -61,6 +66,26 @@ public class TestWritableSerialization extends TestCase { assertNotNull(result.getConf()); } + @SuppressWarnings("unchecked") + public void testIgnoreMisconfiguredMetadata() throws IOException { + // If SERIALIZATION_KEY is set, still need class name. + + Configuration conf = new Configuration(); + Map metadata = new HashMap(); + metadata.put(SerializationBase.SERIALIZATION_KEY, + WritableSerialization.class.getName()); + SerializationFactory factory = new SerializationFactory(conf); + SerializationBase serialization = factory.getSerialization(metadata); + assertNull("Got serializer without any class info", serialization); + + metadata.put(SerializationBase.CLASS_KEY, + Text.class.getName()); + serialization = factory.getSerialization(metadata); + assertNotNull("Didn't get serialization!", serialization); + assertTrue("Wrong serialization class", + serialization instanceof WritableSerialization); + } + @SuppressWarnings("unchecked") public void testReuseSerializer() throws IOException { // Test that we can write multiple objects of the same type @@ -112,4 +137,46 @@ public class TestWritableSerialization extends TestCase { barSerializer.close(); out.reset(); } + + + // Test the SerializationBase.checkSerializationKey() method. + class DummySerializationBase extends SerializationBase { + public boolean accept(Map metadata) { + return checkSerializationKey(metadata); + } + + public SerializerBase getSerializer(Map metadata) { + return null; + } + + public DeserializerBase getDeserializer(Map metadata) { + return null; + } + + public RawComparator getRawComparator(Map metadata) { + return null; + } + } + + public void testSerializationKeyCheck() { + DummySerializationBase dummy = new DummySerializationBase(); + Map metadata = new HashMap(); + + assertTrue("Didn't accept empty metadata", dummy.accept(metadata)); + + metadata.put(SerializationBase.SERIALIZATION_KEY, + DummySerializationBase.class.getName()); + assertTrue("Didn't accept valid metadata", dummy.accept(metadata)); + + metadata.put(SerializationBase.SERIALIZATION_KEY, "foo"); + assertFalse("Accepted invalid metadata", dummy.accept(metadata)); + + try { + dummy.accept((Map) null); + // Shouldn't get here! + fail("Somehow didn't actually test the method we expected"); + } catch (NullPointerException npe) { + // expected this. + } + } } diff --git a/src/test/core/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java b/src/test/core/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java index c3e4a26014..4dd56997ef 100644 --- a/src/test/core/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java +++ b/src/test/core/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java @@ -26,12 +26,38 @@ import junit.framework.TestCase; import org.apache.avro.util.Utf8; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.serializer.SerializationBase; +import org.apache.hadoop.io.serializer.SerializationFactory; import org.apache.hadoop.io.serializer.SerializationTestUtil; public class TestAvroSerialization extends TestCase { private static final Configuration conf = new Configuration(); + @SuppressWarnings("unchecked") + public void testIgnoreMisconfiguredMetadata() { + // If SERIALIZATION_KEY is set, still need class name. + + Configuration conf = new Configuration(); + Map metadata = new HashMap(); + SerializationFactory factory = new SerializationFactory(conf); + SerializationBase serialization = null; + + metadata.put(SerializationBase.SERIALIZATION_KEY, + AvroGenericSerialization.class.getName()); + serialization = factory.getSerialization(metadata); + assertNull("Got serializer without any class info", serialization); + + metadata.put(SerializationBase.SERIALIZATION_KEY, + AvroReflectSerialization.class.getName()); + serialization = factory.getSerialization(metadata); + assertNull("Got serializer without any class info", serialization); + + metadata.put(SerializationBase.SERIALIZATION_KEY, + AvroSpecificSerialization.class.getName()); + serialization = factory.getSerialization(metadata); + assertNull("Got serializer without any class info", serialization); + } + public void testSpecific() throws Exception { AvroRecord before = new AvroRecord(); before.intField = 5;