MAPREDUCE-3458. Fix findbugs warnings in hadoop-examples. (Devaraj K via mahadev)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1210190 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mahadev Konar 2011-12-04 19:58:32 +00:00
parent bb2b8f565c
commit 1972a76e5a
8 changed files with 128 additions and 12 deletions

View File

@ -233,6 +233,9 @@ Release 0.23.1 - Unreleased
MAPREDUCE-3456. $HADOOP_PREFIX/bin/yarn should set defaults for
$HADOOP_*_HOME (Eric Payne via mahadev)
MAPREDUCE-3458. Fix findbugs warnings in hadoop-examples. (Devaraj K
via mahadev)
Release 0.23.0 - 2011-11-01
INCOMPATIBLE CHANGES

View File

@ -0,0 +1,63 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<FindBugsFilter>
<!-- Ignore some irrelevant serialization warnings -->
<Match>
<Class name="org.apache.hadoop.examples.SecondarySort$FirstGroupingComparator" />
<Bug pattern="SE_COMPARATOR_SHOULD_BE_SERIALIZABLE" />
</Match>
<Match>
<Class name="org.apache.hadoop.examples.SecondarySort$IntPair$Comparator" />
<Bug pattern="SE_COMPARATOR_SHOULD_BE_SERIALIZABLE" />
</Match>
<!-- Ignore the irrelevant resource cleanup warnings-->
<Match>
<Class name="org.apache.hadoop.examples.DBCountPageView" />
<Method name="verify" />
<Bug pattern="OBL_UNSATISFIED_OBLIGATION" />
</Match>
<!-- Ignore the irrelevant closure warnings-->
<Match>
<Class name="org.apache.hadoop.examples.dancing.Pentomino$Piece" />
<Bug pattern="EI_EXPOSE_REP2" />
</Match>
<!-- Ignore the irrelevant package protection warnings-->
<Match>
<Class name="org.apache.hadoop.examples.dancing.Pentomino" />
<Or>
<Field name="fourRotations" />
<Field name="oneRotation" />
<Field name="twoRotations" />
</Or>
<Bug pattern="MS_PKGPROTECT" />
</Match>
<!-- Ignore the irrelevant right shift warnings, as only positive integers are given as input-->
<Match>
<Class name="org.apache.hadoop.examples.terasort.Unsigned16" />
<Method name="getHexDigit" />
<Bug pattern="ICAST_QUESTIONABLE_UNSIGNED_RIGHT_SHIFT" />
</Match>
</FindBugsFilter>

View File

@ -27,6 +27,10 @@
<name>Apache Hadoop MapReduce Examples</name>
<packaging>jar</packaging>
<properties>
<mr.examples.basedir>${basedir}</mr.examples.basedir>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
@ -58,6 +62,18 @@
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
<configuration>
<findbugsXmlOutput>true</findbugsXmlOutput>
<xmlOutput>true</xmlOutput>
<excludeFilterFile>${mr.examples.basedir}/dev-support/findbugs-exclude.xml</excludeFilterFile>
<effort>Max</effort>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -19,7 +19,9 @@
package org.apache.hadoop.examples;
import java.io.IOException;
import java.util.*;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
@ -29,9 +31,14 @@
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.ClusterStatus;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.join.*;
import org.apache.hadoop.mapreduce.lib.join.CompositeInputFormat;
import org.apache.hadoop.mapreduce.lib.join.TupleWritable;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
@ -52,7 +59,7 @@
* [<i>in-dir</i>]* <i>in-dir</i> <i>out-dir</i>
*/
public class Join extends Configured implements Tool {
public static String REDUCES_PER_HOST = "mapreduce.join.reduces_per_host";
public static final String REDUCES_PER_HOST = "mapreduce.join.reduces_per_host";
static int printUsage() {
System.out.println("join [-r <reduces>] " +
"[-inFormat <input format class>] " +

View File

@ -69,7 +69,7 @@ public String getName() {
}
public int[] getRotations() {
return rotations;
return rotations.clone();
}
public boolean getFlippable() {

View File

@ -70,7 +70,7 @@ public class TeraGen extends Configured implements Tool {
public static enum Counters {CHECKSUM}
public static String NUM_ROWS = "mapreduce.terasort.num-rows";
public static final String NUM_ROWS = "mapreduce.terasort.num-rows";
/**
* An input format that assigns ranges of longs to each mapper.
*/

View File

@ -156,10 +156,10 @@ Text[] createPartitions(int numPartitions) {
* them and picks N-1 keys to generate N equally sized partitions.
* @param job the job to sample
* @param partFile where to write the output file to
* @throws IOException if something goes wrong
* @throws Throwable if something goes wrong
*/
public static void writePartitionFile(final JobContext job,
Path partFile) throws IOException, InterruptedException {
Path partFile) throws Throwable {
long t1 = System.currentTimeMillis();
Configuration conf = job.getConfiguration();
final TeraInputFormat inFormat = new TeraInputFormat();
@ -174,11 +174,12 @@ public static void writePartitionFile(final JobContext job,
final long recordsPerSample = sampleSize / samples;
final int sampleStep = splits.size() / samples;
Thread[] samplerReader = new Thread[samples];
SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
// take N samples from different parts of the input
for(int i=0; i < samples; ++i) {
final int idx = i;
samplerReader[i] =
new Thread ("Sampler Reader " + idx) {
new Thread (threadGroup,"Sampler Reader " + idx) {
{
setDaemon(true);
}
@ -201,7 +202,7 @@ public void run() {
} catch (IOException ie){
System.err.println("Got an exception while reading splits " +
StringUtils.stringifyException(ie));
System.exit(-1);
throw new RuntimeException(ie);
} catch (InterruptedException e) {
}
@ -215,6 +216,9 @@ public void run() {
for (int i = 0; i < samples; i++) {
try {
samplerReader[i].join();
if(threadGroup.getThrowable() != null){
throw threadGroup.getThrowable();
}
} catch (InterruptedException e) {
}
}
@ -225,6 +229,25 @@ public void run() {
long t3 = System.currentTimeMillis();
System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}
static class SamplerThreadGroup extends ThreadGroup{
private Throwable throwable;
public SamplerThreadGroup(String s) {
super(s);
}
@Override
public void uncaughtException(Thread thread, Throwable throwable) {
this.throwable = throwable;
}
public Throwable getThrowable() {
return this.throwable;
}
}
static class TeraRecordReader extends RecordReader<Text,Text> {
private FSDataInputStream in;

View File

@ -31,7 +31,6 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Cluster;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.MRJobConfig;
@ -300,7 +299,12 @@ public int run(String[] args) throws Exception {
TeraInputFormat.PARTITION_FILENAME);
URI partitionUri = new URI(partitionFile.toString() +
"#" + TeraInputFormat.PARTITION_FILENAME);
TeraInputFormat.writePartitionFile(job, partitionFile);
try {
TeraInputFormat.writePartitionFile(job, partitionFile);
} catch (Throwable e) {
LOG.error(e.getMessage());
return -1;
}
job.addCacheFile(partitionUri);
job.createSymlink();
long end = System.currentTimeMillis();