MAPREDUCE-5478. TeraInputFormat unnecessarily defines its own FileSplit subclass (Sandy Ryza)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1517046 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
15632cd76f
commit
9ee38f3a84
@ -181,6 +181,9 @@ Release 2.1.1-beta - UNRELEASED
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
MAPREDUCE-5478. TeraInputFormat unnecessarily defines its own FileSplit
|
||||
subclass (Sandy Ryza)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
MAPREDUCE-5446. TestJobHistoryEvents and TestJobHistoryParsing have race
|
||||
|
@ -60,48 +60,6 @@ public class TeraInputFormat extends FileInputFormat<Text,Text> {
|
||||
private static MRJobConfig lastContext = null;
|
||||
private static List<InputSplit> lastResult = null;
|
||||
|
||||
static class TeraFileSplit extends FileSplit {
|
||||
static private String[] ZERO_LOCATIONS = new String[0];
|
||||
|
||||
private String[] locations;
|
||||
|
||||
public TeraFileSplit() {
|
||||
locations = ZERO_LOCATIONS;
|
||||
}
|
||||
public TeraFileSplit(Path file, long start, long length, String[] hosts) {
|
||||
super(file, start, length, hosts);
|
||||
try {
|
||||
locations = super.getLocations();
|
||||
} catch (IOException e) {
|
||||
locations = ZERO_LOCATIONS;
|
||||
}
|
||||
}
|
||||
|
||||
// XXXXXX should this also be null-protected?
|
||||
protected void setLocations(String[] hosts) {
|
||||
locations = hosts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String[] getLocations() {
|
||||
return locations;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuffer result = new StringBuffer();
|
||||
result.append(getPath());
|
||||
result.append(" from ");
|
||||
result.append(getStart());
|
||||
result.append(" length ");
|
||||
result.append(getLength());
|
||||
for(String host: getLocations()) {
|
||||
result.append(" ");
|
||||
result.append(host);
|
||||
}
|
||||
return result.toString();
|
||||
}
|
||||
}
|
||||
|
||||
static class TextSampler implements IndexedSortable {
|
||||
private ArrayList<Text> records = new ArrayList<Text>();
|
||||
|
||||
@ -325,11 +283,6 @@ public boolean nextKeyValue() throws IOException {
|
||||
return new TeraRecordReader();
|
||||
}
|
||||
|
||||
protected FileSplit makeSplit(Path file, long start, long length,
|
||||
String[] hosts) {
|
||||
return new TeraFileSplit(file, start, length, hosts);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<InputSplit> getSplits(JobContext job) throws IOException {
|
||||
if (job == lastContext) {
|
||||
@ -343,7 +296,7 @@ public List<InputSplit> getSplits(JobContext job) throws IOException {
|
||||
System.out.println("Spent " + (t2 - t1) + "ms computing base-splits.");
|
||||
if (job.getConfiguration().getBoolean(TeraScheduler.USE, true)) {
|
||||
TeraScheduler scheduler = new TeraScheduler(
|
||||
lastResult.toArray(new TeraFileSplit[0]), job.getConfiguration());
|
||||
lastResult.toArray(new FileSplit[0]), job.getConfiguration());
|
||||
lastResult = scheduler.getNewFileSplits();
|
||||
t3 = System.currentTimeMillis();
|
||||
System.out.println("Spent " + (t3 - t2) + "ms computing TeraScheduler splits.");
|
||||
|
@ -24,7 +24,6 @@
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.examples.terasort.TeraInputFormat.TeraFileSplit;
|
||||
import org.apache.hadoop.mapreduce.InputSplit;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
|
||||
import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig;
|
||||
@ -214,8 +213,9 @@ public List<InputSplit> getNewFileSplits() throws IOException {
|
||||
for(int i=0; i < splits.length; ++i) {
|
||||
if (splits[i].isAssigned) {
|
||||
// copy the split and fix up the locations
|
||||
((TeraFileSplit) realSplits[i]).setLocations
|
||||
(new String[]{splits[i].locations.get(0).hostname});
|
||||
String[] newLocations = {splits[i].locations.get(0).hostname};
|
||||
realSplits[i] = new FileSplit(realSplits[i].getPath(),
|
||||
realSplits[i].getStart(), realSplits[i].getLength(), newLocations);
|
||||
result[left++] = realSplits[i];
|
||||
} else {
|
||||
result[right--] = realSplits[i];
|
||||
|
Loading…
Reference in New Issue
Block a user