From 9a743bd17f55245841db3de5d1c785282306d408 Mon Sep 17 00:00:00 2001 From: Dominik Diedrich <46646548+dom93dd@users.noreply.github.com> Date: Mon, 11 Nov 2024 16:46:36 +0100 Subject: [PATCH] HADOOP-19315. Upgrade Apache Avro to 1.11.4 (#7128) * All field access is now via setter/getter methods * To use Avro to marshal Serializable objects, the packages they are in must be declared in the system property "org.apache.avro.SERIALIZABLE_PACKAGES" This is required to address - CVE-2024-47561 - CVE-2023-39410 This change is not backwards compatible. Contributed by Dominik Diedrich --- LICENSE-binary | 2 +- .../hadoop/constants/ConfigConstants.java | 35 +++++++++++++++++++ .../apache/hadoop/constants/package-info.java | 22 ++++++++++++ .../java/org/apache/hadoop/fs/TestPath.java | 4 +++ .../java/org/apache/hadoop/io/TestText.java | 4 +++ .../avro/TestAvroSerialization.java | 2 +- .../jobhistory/JobQueueChangeEvent.java | 12 +++---- hadoop-project/pom.xml | 2 +- .../apache/hadoop/tools/rumen/JobBuilder.java | 16 ++++----- .../hadoop/tools/rumen/JobHistoryUtils.java | 6 ++-- .../apache/hadoop/tools/rumen/LoggedTask.java | 8 ++--- .../hadoop/tools/rumen/LoggedTaskAttempt.java | 8 ++--- 12 files changed, 93 insertions(+), 28 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/constants/ConfigConstants.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/constants/package-info.java diff --git a/LICENSE-binary b/LICENSE-binary index 8e1fb1856d..1999e9494b 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -297,7 +297,7 @@ io.swagger:swagger-annotations:1.5.4 javax.inject:javax.inject:1 net.java.dev.jna:jna:5.2.0 net.minidev:accessors-smart:1.2 -org.apache.avro:avro:1.9.2 +org.apache.avro:avro:1.11.4 org.apache.avro:avro:1.11.3 org.apache.commons:commons-compress:1.26.1 org.apache.commons:commons-configuration2:2.10.1 diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/constants/ConfigConstants.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/constants/ConfigConstants.java new file mode 100644 index 0000000000..0a6c5a7f35 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/constants/ConfigConstants.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.constants; + +/** + * Evolving config constants class used in various hadoop tests. + */ +public final class ConfigConstants { + + private ConfigConstants() {} + + /** + * System property name for the avro dependency. + * This property is used to configure trusted packages, + * which the avro dependency can use for serialization. + */ + public static final String CONFIG_AVRO_SERIALIZABLE_PACKAGES = + "org.apache.avro.SERIALIZABLE_PACKAGES"; +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/constants/package-info.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/constants/package-info.java new file mode 100644 index 0000000000..19ad181e80 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/constants/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Evolving config constants class used in various hadoop tests. + */ +package org.apache.hadoop.constants; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestPath.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestPath.java index e775d1d069..4204faaada 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestPath.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestPath.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.fs; + import org.junit.Assert; import org.junit.Test; @@ -30,6 +31,7 @@ import java.util.Arrays; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.constants.ConfigConstants; import org.apache.hadoop.io.AvroTestUtil; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Shell; @@ -404,6 +406,8 @@ public void testGetName() { @Test (timeout = 30000) public void testAvroReflect() throws Exception { + // Avro expects explicitely stated, trusted packages used for (de-)serialization + System.setProperty(ConfigConstants.CONFIG_AVRO_SERIALIZABLE_PACKAGES, "org.apache.hadoop.fs"); AvroTestUtil.testReflect (new Path("foo"), "{\"type\":\"string\",\"java-class\":\"org.apache.hadoop.fs.Path\"}"); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java index 4eb260c033..24bb1edb7a 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java @@ -24,6 +24,8 @@ import java.nio.charset.CharacterCodingException; import java.nio.charset.StandardCharsets; import java.util.Random; + +import org.apache.hadoop.constants.ConfigConstants; import org.apache.hadoop.thirdparty.com.google.common.primitives.Bytes; import org.junit.Test; @@ -344,6 +346,8 @@ public void testConcurrentEncodeDecode() throws Exception{ @Test public void testAvroReflect() throws Exception { + // Avro expects explicitely stated, trusted packages used for (de-)serialization + System.setProperty(ConfigConstants.CONFIG_AVRO_SERIALIZABLE_PACKAGES, "org.apache.hadoop.io"); AvroTestUtil.testReflect (new Text("foo"), "{\"type\":\"string\",\"java-class\":\"org.apache.hadoop.io.Text\"}"); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java index b2d2a8d100..97e281ba85 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java @@ -33,7 +33,7 @@ public class TestAvroSerialization { @Test public void testSpecific() throws Exception { AvroRecord before = new AvroRecord(); - before.intField = 5; + before.setIntField(5); AvroRecord after = SerializationTestUtil.testSerialization(conf, before); assertEquals(before, after); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobQueueChangeEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobQueueChangeEvent.java index 66f3781239..9a5ff77725 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobQueueChangeEvent.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobQueueChangeEvent.java @@ -20,7 +20,6 @@ import java.util.Set; -import org.apache.avro.util.Utf8; import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEvent; @@ -31,8 +30,8 @@ public class JobQueueChangeEvent implements HistoryEvent { private JobQueueChange datum = new JobQueueChange(); public JobQueueChangeEvent(JobID id, String queueName) { - datum.jobid = new Utf8(id.toString()); - datum.jobQueueName = new Utf8(queueName); + datum.setJobid(id.toString()); + datum.setJobQueueName(queueName); } JobQueueChangeEvent() { } @@ -54,13 +53,14 @@ public void setDatum(Object datum) { /** Get the Job ID */ public JobID getJobId() { - return JobID.forName(datum.jobid.toString()); + return JobID.forName(datum.getJobid().toString()); } /** Get the new Job queue name */ public String getJobQueueName() { - if (datum.jobQueueName != null) { - return datum.jobQueueName.toString(); + java.lang.CharSequence jobQueueName = datum.getJobQueueName(); + if (jobQueueName != null) { + return jobQueueName.toString(); } return null; } diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 0c76b3ad4b..fedd3f633a 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -63,7 +63,7 @@ file:///dev/urandom - 1.9.2 + 1.11.4 1.19.4 diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java index 1213e6a46f..603b248f6e 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobBuilder.java @@ -460,7 +460,7 @@ private void processTaskFinishedEvent(TaskFinishedEvent event) { } task.setFinishTime(event.getFinishTime()); task.setTaskStatus(getPre21Value(event.getTaskStatus())); - task.incorporateCounters(((TaskFinished) event.getDatum()).counters); + task.incorporateCounters(((TaskFinished) event.getDatum()).getCounters()); } private void processTaskFailedEvent(TaskFailedEvent event) { @@ -472,7 +472,7 @@ private void processTaskFailedEvent(TaskFailedEvent event) { task.setFinishTime(event.getFinishTime()); task.setTaskStatus(getPre21Value(event.getTaskStatus())); TaskFailed t = (TaskFailed)(event.getDatum()); - task.putDiagnosticInfo(t.error.toString()); + task.putDiagnosticInfo(t.getError().toString()); // killed task wouldn't have failed attempt. if (t.getFailedDueToAttempt() != null) { task.putFailedDueToAttemptId(t.getFailedDueToAttempt().toString()); @@ -542,7 +542,7 @@ private void processTaskAttemptFinishedEvent(TaskAttemptFinishedEvent event) { } attempt.setFinishTime(event.getFinishTime()); attempt - .incorporateCounters(((TaskAttemptFinished) event.getDatum()).counters); + .incorporateCounters(((TaskAttemptFinished) event.getDatum()).getCounters()); } private void processReduceAttemptFinishedEvent( @@ -568,7 +568,7 @@ private void processReduceAttemptFinishedEvent( attempt.setShuffleFinished(event.getShuffleFinishTime()); attempt.setSortFinished(event.getSortFinishTime()); attempt - .incorporateCounters(((ReduceAttemptFinished) event.getDatum()).counters); + .incorporateCounters(((ReduceAttemptFinished) event.getDatum()).getCounters()); attempt.arraySetClockSplits(event.getClockSplits()); attempt.arraySetCpuUsages(event.getCpuUsages()); attempt.arraySetVMemKbytes(event.getVMemKbytes()); @@ -596,7 +596,7 @@ private void processMapAttemptFinishedEvent(MapAttemptFinishedEvent event) { // is redundant, but making this will add future-proofing. attempt.setFinishTime(event.getFinishTime()); attempt - .incorporateCounters(((MapAttemptFinished) event.getDatum()).counters); + .incorporateCounters(((MapAttemptFinished) event.getDatum()).getCounters()); attempt.arraySetClockSplits(event.getClockSplits()); attempt.arraySetCpuUsages(event.getCpuUsages()); attempt.arraySetVMemKbytes(event.getVMemKbytes()); @@ -661,11 +661,11 @@ private void processJobFinishedEvent(JobFinishedEvent event) { JobFinished job = (JobFinished)event.getDatum(); Map countersMap = - JobHistoryUtils.extractCounters(job.totalCounters); + JobHistoryUtils.extractCounters(job.getTotalCounters()); result.putTotalCounters(countersMap); - countersMap = JobHistoryUtils.extractCounters(job.mapCounters); + countersMap = JobHistoryUtils.extractCounters(job.getMapCounters()); result.putMapCounters(countersMap); - countersMap = JobHistoryUtils.extractCounters(job.reduceCounters); + countersMap = JobHistoryUtils.extractCounters(job.getReduceCounters()); result.putReduceCounters(countersMap); } diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobHistoryUtils.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobHistoryUtils.java index 6ae87bbd40..34ef95f337 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobHistoryUtils.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobHistoryUtils.java @@ -157,9 +157,9 @@ static boolean isJobConfXml(String fileName) { static Map extractCounters(JhCounters counters) { Map countersMap = new HashMap(); if (counters != null) { - for (JhCounterGroup group : counters.groups) { - for (JhCounter counter : group.counts) { - countersMap.put(counter.name.toString(), counter.value); + for (JhCounterGroup group : counters.getGroups()) { + for (JhCounter counter : group.getCounts()) { + countersMap.put(counter.getName().toString(), counter.getValue()); } } } diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/LoggedTask.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/LoggedTask.java index 4ae33a7661..2308e58690 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/LoggedTask.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/LoggedTask.java @@ -268,11 +268,11 @@ private static void incorporateCounter(SetField thunk, JhCounters counters, String counterName) { counterName = canonicalizeCounterName(counterName); - for (JhCounterGroup group : counters.groups) { - for (JhCounter counter : group.counts) { + for (JhCounterGroup group : counters.getGroups()) { + for (JhCounter counter : group.getCounts()) { if (counterName - .equals(canonicalizeCounterName(counter.name.toString()))) { - thunk.set(counter.value); + .equals(canonicalizeCounterName(counter.getName().toString()))) { + thunk.set(counter.getValue()); return; } } diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/LoggedTaskAttempt.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/LoggedTaskAttempt.java index 5c6abd372c..c4ca962b6e 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/LoggedTaskAttempt.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/LoggedTaskAttempt.java @@ -636,11 +636,11 @@ private static void incorporateCounter(SetField thunk, JhCounters counters, String counterName) { counterName = canonicalizeCounterName(counterName); - for (JhCounterGroup group : counters.groups) { - for (JhCounter counter : group.counts) { + for (JhCounterGroup group : counters.getGroups()) { + for (JhCounter counter : group.getCounts()) { if (counterName - .equals(canonicalizeCounterName(counter.name.toString()))) { - thunk.set(counter.value); + .equals(canonicalizeCounterName(counter.getName().toString()))) { + thunk.set(counter.getValue()); return; } }