From 960d7643fba4e537f6f499a7cea707211528e126 Mon Sep 17 00:00:00 2001 From: Konstantin Shvachko Date: Fri, 30 Sep 2011 19:04:56 +0000 Subject: [PATCH] MAPREDUCE-2779. JobSplitWriter.java can't handle large job.split file. Contributed by Ming Ma. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1177779 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../hadoop/mapreduce/split/JobSplitWriter.java | 12 ++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 9a1508beb8..18819ace67 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -2072,6 +2072,9 @@ Release 0.22.0 - Unreleased MAPREDUCE-2991. queueinfo.jsp fails to show queue status for Capacity scheduler if queue names contain special symbols. (Priyo Mustafi via shv) + MAPREDUCE-2779. JobSplitWriter.java can't handle large job.split file. + (Ming Ma via shv) + Release 0.21.1 - Unreleased NEW FEATURES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java index 95f1fccfb7..b6e44d71c4 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplitWriter.java @@ -114,15 +114,15 @@ SplitMetaInfo[] writeNewSplits(Configuration conf, if (array.length != 0) { SerializationFactory factory = new SerializationFactory(conf); int i = 0; - long offset = out.size(); + long offset = out.getPos(); for(T split: array) { - int prevCount = out.size(); + long prevCount = out.getPos(); Text.writeString(out, split.getClass().getName()); Serializer serializer = factory.getSerializer((Class) split.getClass()); serializer.open(out); serializer.serialize(split); - int currCount = out.size(); + long currCount = out.getPos(); info[i++] = new JobSplit.SplitMetaInfo( split.getLocations(), offset, @@ -139,12 +139,12 @@ private static SplitMetaInfo[] writeOldSplits( SplitMetaInfo[] info = new SplitMetaInfo[splits.length]; if (splits.length != 0) { int i = 0; - long offset = out.size(); + long offset = out.getPos(); for(org.apache.hadoop.mapred.InputSplit split: splits) { - int prevLen = out.size(); + long prevLen = out.getPos(); Text.writeString(out, split.getClass().getName()); split.write(out); - int currLen = out.size(); + long currLen = out.getPos(); info[i++] = new JobSplit.SplitMetaInfo( split.getLocations(), offset, split.getLength());