MAPREDUCE-7376. AggregateWordCount fetches wrong results. (#4257). Contributed by Ayush Saxena.

Reviewed-by: Steve Loughran <stevel@apache.org>
This commit is contained in:
Ayush Saxena 2022-05-09 22:56:14 +05:30 committed by GitHub
parent a394c2b031
commit 665ada6d21
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 94 additions and 2 deletions

View File

@ -200,7 +200,7 @@ public static Configuration setAggregatorDescriptors(
conf.setInt(ValueAggregatorJobBase.DESCRIPTOR_NUM, descriptors.length);
//specify the aggregator descriptors
for(int i=0; i< descriptors.length; i++) {
conf.set(ValueAggregatorJobBase.DESCRIPTOR + i,
conf.set(ValueAggregatorJobBase.DESCRIPTOR + "." + i,
"UserDefined," + descriptors[i].getName());
}
return conf;

View File

@ -27,6 +27,7 @@
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor;
import org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob;
import org.apache.hadoop.util.ExitUtil;
/**
* This is an example Aggregated Hadoop Map/Reduce application. It reads the
@ -72,7 +73,7 @@ public static void main(String[] args)
, new Class[] {WordCountPlugInClass.class});
job.setJarByClass(AggregateWordCount.class);
int ret = job.waitForCompletion(true) ? 0 : 1;
System.exit(ret);
ExitUtil.terminate(ret);
}
}

View File

@ -0,0 +1,91 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.examples;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import org.junit.After;
import org.junit.Test;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.HadoopTestCase;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.util.ExitUtil.ExitException;
import static org.junit.Assert.assertEquals;
public class TestAggregateWordCount extends HadoopTestCase {
public TestAggregateWordCount() throws IOException {
super(LOCAL_MR, LOCAL_FS, 1, 1);
}
@After
public void tearDown() throws Exception {
FileSystem fs = getFileSystem();
if (fs != null) {
fs.delete(TEST_DIR, true);
}
super.tearDown();
}
// Input/Output paths for sort
private static final Path TEST_DIR = new Path(
new File(System.getProperty("test.build.data", "/tmp"),
"aggregatewordcount").getAbsoluteFile().toURI().toString());
private static final Path INPUT_PATH = new Path(TEST_DIR, "inPath");
private static final Path OUTPUT_PATH = new Path(TEST_DIR, "outPath");
@Test
public void testAggregateTestCount()
throws IOException, ClassNotFoundException, InterruptedException {
ExitUtil.disableSystemExit();
FileSystem fs = getFileSystem();
fs.mkdirs(INPUT_PATH);
Path file1 = new Path(INPUT_PATH, "file1");
Path file2 = new Path(INPUT_PATH, "file2");
FileUtil.write(fs, file1, "Hello World");
FileUtil.write(fs, file2, "Hello Hadoop");
String[] args =
new String[] {INPUT_PATH.toString(), OUTPUT_PATH.toString(), "1",
"textinputformat"};
// Run AggregateWordCount Job.
try {
AggregateWordCount.main(args);
} catch (ExitException e) {
assertEquals(0, e.status);
}
String allEntries;
try (FSDataInputStream stream = fs
.open(new Path(OUTPUT_PATH, "part-r-00000"));) {
allEntries = IOUtils.toString(stream, Charset.defaultCharset());
}
assertEquals("Hadoop\t1\n" + "Hello\t2\n" + "World\t1\n", allEntries);
}
}