MAPREDUCE-7376. AggregateWordCount fetches wrong results. (#4257). Contributed by Ayush Saxena.

Reviewed-by: Steve Loughran <stevel@apache.org>
2022-05-09 22:56:14 +05:30 · 2022-05-09 22:56:14 +05:30 · 665ada6d21
commit 665ada6d21
parent a394c2b031
3 changed files with 94 additions and 2 deletions
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorJob.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorJob.java
@ -200,7 +200,7 @@ public static Configuration setAggregatorDescriptors(
    conf.setInt(ValueAggregatorJobBase.DESCRIPTOR_NUM, descriptors.length);
    //specify the aggregator descriptors
    for(int i=0; i< descriptors.length; i++) {
-      conf.set(ValueAggregatorJobBase.DESCRIPTOR + i, 
+      conf.set(ValueAggregatorJobBase.DESCRIPTOR + "." + i,
               "UserDefined," + descriptors[i].getName());
    }
    return conf;
--- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/AggregateWordCount.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/AggregateWordCount.java
@ -27,6 +27,7 @@
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor;
 import org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob;
+import org.apache.hadoop.util.ExitUtil;

 /**
 * This is an example Aggregated Hadoop Map/Reduce application. It reads the
@ -72,7 +73,7 @@ public static void main(String[] args)
        , new Class[] {WordCountPlugInClass.class});
    job.setJarByClass(AggregateWordCount.class);
    int ret = job.waitForCompletion(true) ? 0 : 1;
-    System.exit(ret);
+    ExitUtil.terminate(ret);
  }

 }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestAggregateWordCount.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/test/java/org/apache/hadoop/examples/TestAggregateWordCount.java
@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.examples;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+
+import org.junit.After;
+import org.junit.Test;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.HadoopTestCase;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.ExitUtil.ExitException;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestAggregateWordCount extends HadoopTestCase {
+  public TestAggregateWordCount() throws IOException {
+    super(LOCAL_MR, LOCAL_FS, 1, 1);
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    FileSystem fs = getFileSystem();
+    if (fs != null) {
+      fs.delete(TEST_DIR, true);
+    }
+    super.tearDown();
+  }
+
+  // Input/Output paths for sort
+  private static final Path TEST_DIR = new Path(
+      new File(System.getProperty("test.build.data", "/tmp"),
+          "aggregatewordcount").getAbsoluteFile().toURI().toString());
+
+  private static final Path INPUT_PATH = new Path(TEST_DIR, "inPath");
+  private static final Path OUTPUT_PATH = new Path(TEST_DIR, "outPath");
+
+  @Test
+  public void testAggregateTestCount()
+      throws IOException, ClassNotFoundException, InterruptedException {
+
+    ExitUtil.disableSystemExit();
+    FileSystem fs = getFileSystem();
+    fs.mkdirs(INPUT_PATH);
+    Path file1 = new Path(INPUT_PATH, "file1");
+    Path file2 = new Path(INPUT_PATH, "file2");
+    FileUtil.write(fs, file1, "Hello World");
+    FileUtil.write(fs, file2, "Hello Hadoop");
+
+    String[] args =
+        new String[] {INPUT_PATH.toString(), OUTPUT_PATH.toString(), "1",
+            "textinputformat"};
+
+    // Run AggregateWordCount Job.
+    try {
+      AggregateWordCount.main(args);
+    } catch (ExitException e) {
+      assertEquals(0, e.status);
+    }
+
+    String allEntries;
+    try (FSDataInputStream stream = fs
+        .open(new Path(OUTPUT_PATH, "part-r-00000"));) {
+      allEntries = IOUtils.toString(stream, Charset.defaultCharset());
+    }
+
+    assertEquals("Hadoop\t1\n" + "Hello\t2\n" + "World\t1\n", allEntries);
+  }
+}