a196766ea0
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1134994 13f79535-47bb-0310-9956-ffa450edef68
31241 lines
1.3 MiB
31241 lines
1.3 MiB
<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
|
|
<!-- Generated by the JDiff Javadoc doclet -->
|
|
<!-- (http://www.jdiff.org) -->
|
|
<!-- on Tue Aug 24 11:40:41 PDT 2010 -->
|
|
|
|
<api
|
|
xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'
|
|
xsi:noNamespaceSchemaLocation='api.xsd'
|
|
name="hadoop-mapred 0.21.0"
|
|
jdversion="1.0.9">
|
|
|
|
<!-- Command line arguments = -doclet org.apache.hadoop.classification.tools.ExcludePrivateAnnotationsJDiffDoclet -docletpath /Users/tom/workspace/hadoop-mapreduce-0.21-committer/build/ivy/lib/Hadoop/common/hadoop-common-0.21.0.jar:/Users/tom/workspace/hadoop-mapreduce-0.21-committer/build/ivy/lib/Hadoop/jdiff/jdiff-1.0.9.jar:/Users/tom/workspace/hadoop-mapreduce-0.21-committer/build/ivy/lib/Hadoop/jdiff/xerces-1.4.4.jar -classpath /Users/tom/workspace/hadoop-mapreduce-0.21-committer/build/classes:/Users/tom/workspace/hadoop-mapreduce-0.21-committer/conf:/Users/tom/.ivy2/cache/org.apache.hadoop/hadoop-common/jars/hadoop-common-0.21.0-SNAPSHOT.jar:/Users/tom/.ivy2/cache/commons-cli/commons-cli/jars/commons-cli-1.2.jar:/Users/tom/.ivy2/cache/xmlenc/xmlenc/jars/xmlenc-0.52.jar:/Users/tom/.ivy2/cache/commons-httpclient/commons-httpclient/jars/commons-httpclient-3.1.jar:/Users/tom/.ivy2/cache/commons-codec/commons-codec/jars/commons-codec-1.4.jar:/Users/tom/.ivy2/cache/commons-net/commons-net/jars/commons-net-1.4.1.jar:/Users/tom/.ivy2/cache/oro/oro/jars/oro-2.0.8.jar:/Users/tom/.ivy2/cache/org.mortbay.jetty/jetty/jars/jetty-6.1.14.jar:/Users/tom/.ivy2/cache/org.mortbay.jetty/jetty-util/jars/jetty-util-6.1.14.jar:/Users/tom/.ivy2/cache/org.mortbay.jetty/servlet-api-2.5/jars/servlet-api-2.5-6.1.14.jar:/Users/tom/.ivy2/cache/tomcat/jasper-runtime/jars/jasper-runtime-5.5.12.jar:/Users/tom/.ivy2/cache/tomcat/jasper-compiler/jars/jasper-compiler-5.5.12.jar:/Users/tom/.ivy2/cache/org.mortbay.jetty/jsp-api-2.1/jars/jsp-api-2.1-6.1.14.jar:/Users/tom/.ivy2/cache/org.mortbay.jetty/jsp-2.1/jars/jsp-2.1-6.1.14.jar:/Users/tom/.ivy2/cache/org.eclipse.jdt/core/jars/core-3.1.1.jar:/Users/tom/.ivy2/cache/ant/ant/jars/ant-1.6.5.jar:/Users/tom/.ivy2/cache/commons-el/commons-el/jars/commons-el-1.0.jar:/Users/tom/.ivy2/cache/net.java.dev.jets3t/jets3t/jars/jets3t-0.7.1.jar:/Users/tom/.ivy2/cache/commons-logging/commons-logging/jars/commons-logging-1.1.1.jar:/Users/tom/.ivy2/cache/net.sf.kosmosfs/kfs/jars/kfs-0.3.jar:/Users/tom/.ivy2/cache/junit/junit/jars/junit-4.8.1.jar:/Users/tom/.ivy2/cache/hsqldb/hsqldb/jars/hsqldb-1.8.0.10.jar:/Users/tom/.ivy2/cache/org.apache.hadoop/avro/jars/avro-1.3.2.jar:/Users/tom/.ivy2/cache/org.codehaus.jackson/jackson-mapper-asl/jars/jackson-mapper-asl-1.4.2.jar:/Users/tom/.ivy2/cache/org.codehaus.jackson/jackson-core-asl/jars/jackson-core-asl-1.4.2.jar:/Users/tom/.ivy2/cache/org.slf4j/slf4j-api/jars/slf4j-api-1.5.11.jar:/Users/tom/.ivy2/cache/com.thoughtworks.paranamer/paranamer/jars/paranamer-2.2.jar:/Users/tom/.ivy2/cache/com.thoughtworks.paranamer/paranamer-ant/jars/paranamer-ant-2.2.jar:/Users/tom/.ivy2/cache/com.thoughtworks.paranamer/paranamer-generator/jars/paranamer-generator-2.2.jar:/Users/tom/.ivy2/cache/com.thoughtworks.qdox/qdox/jars/qdox-1.10.1.jar:/Users/tom/.ivy2/cache/asm/asm/jars/asm-3.2.jar:/Users/tom/.ivy2/cache/commons-lang/commons-lang/jars/commons-lang-2.5.jar:/Users/tom/.ivy2/cache/org.apache.hadoop/hadoop-common-test/jars/hadoop-common-test-0.21.0-SNAPSHOT.jar:/Users/tom/.ivy2/cache/org.apache.ftpserver/ftplet-api/bundles/ftplet-api-1.0.0.jar:/Users/tom/.ivy2/cache/org.apache.mina/mina-core/bundles/mina-core-2.0.0-M5.jar:/Users/tom/.ivy2/cache/org.apache.ftpserver/ftpserver-core/bundles/ftpserver-core-1.0.0.jar:/Users/tom/.ivy2/cache/org.apache.ftpserver/ftpserver-deprecated/jars/ftpserver-deprecated-1.0.0-M2.jar:/Users/tom/.ivy2/cache/org.apache.hadoop/hadoop-hdfs/jars/hadoop-hdfs-0.21.0-SNAPSHOT.jar:/Users/tom/.ivy2/cache/log4j/log4j/jars/log4j-1.2.15.jar:/Users/tom/.ivy2/cache/org.slf4j/slf4j-log4j12/jars/slf4j-log4j12-1.5.11.jar:/Users/tom/.ivy2/cache/org.aspectj/aspectjrt/jars/aspectjrt-1.6.5.jar:/Users/tom/.ivy2/cache/org.aspectj/aspectjtools/jars/aspectjtools-1.6.5.jar:/Users/tom/.ivy2/cache/jdiff/jdiff/jars/jdiff-1.0.9.jar:/Users/tom/.ivy2/cache/xerces/xerces/jars/xerces-1.4.4.jar:/usr/share/ant/lib/ant-launcher.jar:/Users/tom/.ant/lib/ivy.jar:/usr/share/ant/lib/ant-antlr.jar:/usr/share/ant/lib/ant-jai.jar:/usr/share/ant/lib/ant-jmf.jar:/usr/share/ant/lib/ant-junit.jar:/usr/share/ant/lib/ant-nodeps.jar:/usr/share/ant/lib/ant-swing.jar:/usr/share/ant/lib/ant-testutil.jar:/usr/share/ant/lib/ant-trax.jar:/usr/share/ant/lib/ant.jar:/usr/share/ant/lib/ivy-2.1.0.jar:/usr/share/ant/lib/xercesImpl.jar:/usr/share/ant/lib/xml-apis.jar -sourcepath /Users/tom/workspace/hadoop-mapreduce-0.21-committer/src/java:/Users/tom/workspace/hadoop-mapreduce-0.21-committer/src/tools -apidir /Users/tom/workspace/hadoop-mapreduce-0.21-committer/lib/jdiff -apiname hadoop-mapred 0.21.0 -->
|
|
<package name="org.apache.hadoop.filecache">
|
|
<!-- start class org.apache.hadoop.filecache.DistributedCache -->
|
|
<class name="DistributedCache" extends="org.apache.hadoop.mapreduce.filecache.DistributedCache"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use methods on {@link Job}.">
|
|
<constructor name="DistributedCache"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[Distribute application-specific large, read-only files efficiently.
|
|
|
|
<p><code>DistributedCache</code> is a facility provided by the Map-Reduce
|
|
framework to cache files (text, archives, jars etc.) needed by applications.
|
|
</p>
|
|
|
|
<p>Applications specify the files, via urls (hdfs:// or http://) to be cached
|
|
via the {@link org.apache.hadoop.mapred.JobConf}. The
|
|
<code>DistributedCache</code> assumes that the files specified via urls are
|
|
already present on the {@link FileSystem} at the path specified by the url
|
|
and are accessible by every machine in the cluster.</p>
|
|
|
|
<p>The framework will copy the necessary files on to the slave node before
|
|
any tasks for the job are executed on that node. Its efficiency stems from
|
|
the fact that the files are only copied once per job and the ability to
|
|
cache archives which are un-archived on the slaves.</p>
|
|
|
|
<p><code>DistributedCache</code> can be used to distribute simple, read-only
|
|
data/text files and/or more complex types such as archives, jars etc.
|
|
Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes.
|
|
Jars may be optionally added to the classpath of the tasks, a rudimentary
|
|
software distribution mechanism. Files have execution permissions.
|
|
Optionally users can also direct it to symlink the distributed cache file(s)
|
|
into the working directory of the task.</p>
|
|
|
|
<p><code>DistributedCache</code> tracks modification timestamps of the cache
|
|
files. Clearly the cache files should not be modified by the application
|
|
or externally while the job is executing.</p>
|
|
|
|
<p>Here is an illustrative example on how to use the
|
|
<code>DistributedCache</code>:</p>
|
|
<p><blockquote><pre>
|
|
// Setting up the cache for the application
|
|
|
|
1. Copy the requisite files to the <code>FileSystem</code>:
|
|
|
|
$ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat
|
|
$ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip
|
|
$ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
|
|
$ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
|
|
$ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
|
|
$ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
|
|
|
|
2. Setup the application's <code>JobConf</code>:
|
|
|
|
JobConf job = new JobConf();
|
|
DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"),
|
|
job);
|
|
DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
|
|
DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
|
|
DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
|
|
DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
|
|
DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
|
|
|
|
3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
|
|
or {@link org.apache.hadoop.mapred.Reducer}:
|
|
|
|
public static class MapClass extends MapReduceBase
|
|
implements Mapper<K, V, K, V> {
|
|
|
|
private Path[] localArchives;
|
|
private Path[] localFiles;
|
|
|
|
public void configure(JobConf job) {
|
|
// Get the cached archives/files
|
|
localArchives = DistributedCache.getLocalCacheArchives(job);
|
|
localFiles = DistributedCache.getLocalCacheFiles(job);
|
|
}
|
|
|
|
public void map(K key, V value,
|
|
OutputCollector<K, V> output, Reporter reporter)
|
|
throws IOException {
|
|
// Use data from the cached archives/files here
|
|
// ...
|
|
// ...
|
|
output.collect(k, v);
|
|
}
|
|
}
|
|
|
|
</pre></blockquote></p>
|
|
|
|
It is also very common to use the DistributedCache by using
|
|
{@link org.apache.hadoop.util.GenericOptionsParser}.
|
|
|
|
This class includes methods that should be used by users
|
|
(specifically those mentioned in the example above, as well
|
|
as {@link DistributedCache#addArchiveToClassPath(Path, Configuration)}),
|
|
as well as methods intended for use by the MapReduce framework
|
|
(e.g., {@link org.apache.hadoop.mapred.JobClient}). For implementation
|
|
details, see {@link TrackerDistributedCacheManager} and
|
|
{@link TaskDistributedCacheManager}.
|
|
|
|
@see org.apache.hadoop.mapred.JobConf
|
|
@see org.apache.hadoop.mapred.JobClient
|
|
@see org.apache.hadoop.mapreduce.Job
|
|
@deprecated Use methods on {@link Job}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.filecache.DistributedCache -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapred">
|
|
<!-- start class org.apache.hadoop.mapred.ClusterStatus -->
|
|
<class name="ClusterStatus" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<method name="getTaskTrackers" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of task trackers in the cluster.
|
|
|
|
@return the number of task trackers in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getActiveTrackerNames" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the names of task trackers in the cluster.
|
|
|
|
@return the active task trackers in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getBlacklistedTrackerNames" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the names of task trackers in the cluster.
|
|
|
|
@return the blacklisted task trackers in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getBlacklistedTrackers" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of blacklisted task trackers in the cluster.
|
|
|
|
@return the number of blacklisted task trackers in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumExcludedNodes" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of excluded hosts in the cluster.
|
|
@return the number of excluded hosts in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTTExpiryInterval" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the tasktracker expiry interval for the cluster
|
|
@return the expiry interval in msec]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of currently running map tasks in the cluster.
|
|
|
|
@return the number of currently running map tasks in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReduceTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of currently running reduce tasks in the cluster.
|
|
|
|
@return the number of currently running reduce tasks in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxMapTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the maximum capacity for running map tasks in the cluster.
|
|
|
|
@return the maximum capacity for running map tasks in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxReduceTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the maximum capacity for running reduce tasks in the cluster.
|
|
|
|
@return the maximum capacity for running reduce tasks in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobTrackerState" return="org.apache.hadoop.mapred.JobTracker.State"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the current state of the <code>JobTracker</code>,
|
|
as {@link JobTracker.State}
|
|
|
|
@return the current state of the <code>JobTracker</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUsedMemory" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the total heap memory used by the <code>JobTracker</code>
|
|
|
|
@return the size of heap memory used by the <code>JobTracker</code>]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxMemory" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the maximum configured heap memory that can be used by the <code>JobTracker</code>
|
|
|
|
@return the configured size of max heap memory that can be used by the <code>JobTracker</code>]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getBlackListedTrackersInfo" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Gets the list of blacklisted trackers along with reasons for blacklisting.
|
|
|
|
@return the collection of {@link BlackListInfo} objects.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Status information on the current state of the Map-Reduce cluster.
|
|
|
|
<p><code>ClusterStatus</code> provides clients with information such as:
|
|
<ol>
|
|
<li>
|
|
Size of the cluster.
|
|
</li>
|
|
<li>
|
|
Name of the trackers.
|
|
</li>
|
|
<li>
|
|
Task capacity of the cluster.
|
|
</li>
|
|
<li>
|
|
The number of currently running map & reduce tasks.
|
|
</li>
|
|
<li>
|
|
State of the <code>JobTracker</code>.
|
|
</li>
|
|
<li>
|
|
Details regarding black listed trackers.
|
|
</li>
|
|
</ol></p>
|
|
|
|
<p>Clients can query for the latest <code>ClusterStatus</code>, via
|
|
{@link JobClient#getClusterStatus()}.</p>
|
|
|
|
@see JobClient]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.ClusterStatus -->
|
|
<!-- start class org.apache.hadoop.mapred.ClusterStatus.BlackListInfo -->
|
|
<class name="ClusterStatus.BlackListInfo" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<method name="getTrackerName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Gets the blacklisted tasktracker's name.
|
|
|
|
@return tracker's name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReasonForBlackListing" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Gets the reason for which the tasktracker was blacklisted.
|
|
|
|
@return reason which tracker was blacklisted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getBlackListReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Gets a descriptive report about why the tasktracker was blacklisted.
|
|
|
|
@return report describing why the tasktracker was blacklisted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Class which encapsulates information about a blacklisted tasktracker.
|
|
|
|
The information includes the tasktracker's name and reasons for
|
|
getting blacklisted. The toString method of the class will print
|
|
the information in a whitespace separated fashion to enable parsing.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.ClusterStatus.BlackListInfo -->
|
|
<!-- start class org.apache.hadoop.mapred.Counters -->
|
|
<class name="Counters" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<implements name="java.lang.Iterable"/>
|
|
<constructor name="Counters"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getGroupNames" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the names of all counter classes.
|
|
@return Set of counter names.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="iterator" return="java.util.Iterator"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getGroup" return="org.apache.hadoop.mapred.Counters.Group"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="groupName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Returns the named counter group, or an empty group if there is none
|
|
with the specified name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Enum"/>
|
|
<doc>
|
|
<![CDATA[Find the counter for the given enum. The same enum will always return the
|
|
same counter.
|
|
@param key the counter key
|
|
@return the matching counter object]]>
|
|
</doc>
|
|
</method>
|
|
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="group" type="java.lang.String"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Find a counter given the group and the name.
|
|
@param group the name of the group
|
|
@param name the internal name of the counter
|
|
@return the counter for that name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="deprecated, no comment">
|
|
<param name="group" type="java.lang.String"/>
|
|
<param name="id" type="int"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Find a counter by using strings
|
|
@param group the name of the group
|
|
@param id the id of the counter within the group (0 to N-1)
|
|
@param name the internal name of the counter
|
|
@return the counter for that name
|
|
@deprecated]]>
|
|
</doc>
|
|
</method>
|
|
<method name="incrCounter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Enum"/>
|
|
<param name="amount" type="long"/>
|
|
<doc>
|
|
<![CDATA[Increments the specified counter by the specified amount, creating it if
|
|
it didn't already exist.
|
|
@param key identifies a counter
|
|
@param amount amount by which counter is to be incremented]]>
|
|
</doc>
|
|
</method>
|
|
<method name="incrCounter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="group" type="java.lang.String"/>
|
|
<param name="counter" type="java.lang.String"/>
|
|
<param name="amount" type="long"/>
|
|
<doc>
|
|
<![CDATA[Increments the specified counter by the specified amount, creating it if
|
|
it didn't already exist.
|
|
@param group the name of the group
|
|
@param counter the internal name of the counter
|
|
@param amount amount by which counter is to be incremented]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Enum"/>
|
|
<doc>
|
|
<![CDATA[Returns current value of the specified counter, or 0 if the counter
|
|
does not exist.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="incrAllCounters"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="org.apache.hadoop.mapred.Counters"/>
|
|
<doc>
|
|
<![CDATA[Increments multiple counters by their amounts in another Counters
|
|
instance.
|
|
@param other the other Counters instance]]>
|
|
</doc>
|
|
</method>
|
|
<method name="sum" return="org.apache.hadoop.mapred.Counters"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="a" type="org.apache.hadoop.mapred.Counters"/>
|
|
<param name="b" type="org.apache.hadoop.mapred.Counters"/>
|
|
<doc>
|
|
<![CDATA[Convenience method for computing the sum of two sets of counters.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="size" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the total number of counters, by summing the number of counters
|
|
in each group.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Write the set of groups.
|
|
The external format is:
|
|
#groups (groupName group)*
|
|
|
|
i.e. the number of groups followed by 0 or more groups, where each
|
|
group is of the form:
|
|
|
|
groupDisplayName #counters (false | true counter)*
|
|
|
|
where each counter is of the form:
|
|
|
|
name (false | true displayName) value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read a set of groups.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="log"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="log" type="org.apache.commons.logging.Log"/>
|
|
<doc>
|
|
<![CDATA[Logs the current counter values.
|
|
@param log The log to use.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return textual representation of the counter values.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="makeCompactString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Convert a counters object into a single line that is easy to parse.
|
|
@return the string with "name=value" for each counter and separated by ","]]>
|
|
</doc>
|
|
</method>
|
|
<method name="makeEscapedCompactString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Represent the counter in a textual format that can be converted back to
|
|
its object form
|
|
@return the string in the following format
|
|
{(groupname)(group-displayname)[(countername)(displayname)(value)][][]}{}{}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="fromEscapedCompactString" return="org.apache.hadoop.mapred.Counters"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="compactString" type="java.lang.String"/>
|
|
<exception name="ParseException" type="java.text.ParseException"/>
|
|
<doc>
|
|
<![CDATA[Convert a stringified counter representation into a counter object. Note
|
|
that the counter can be recovered if its stringified using
|
|
{@link #makeEscapedCompactString()}.
|
|
@return a Counter]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="obj" type="java.lang.Object"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A set of named counters.
|
|
|
|
<p><code>Counters</code> represent global counters, defined either by the
|
|
Map-Reduce framework or applications. Each <code>Counter</code> can be of
|
|
any {@link Enum} type.</p>
|
|
|
|
<p><code>Counters</code> are bunched into {@link Group}s, each comprising of
|
|
counters from a particular <code>Enum</code> class.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.Counters -->
|
|
<!-- start class org.apache.hadoop.mapred.Counters.Counter -->
|
|
<class name="Counters.Counter" extends="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="setDisplayName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="newName" type="java.lang.String"/>
|
|
</method>
|
|
<method name="makeEscapedCompactString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the compact stringified version of the counter in the format
|
|
[(actual-name)(display-name)(value)]]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[What is the current value of this counter?
|
|
@return the current value]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A counter record, comprising its name and value.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.Counters.Counter -->
|
|
<!-- start class org.apache.hadoop.mapred.Counters.Group -->
|
|
<class name="Counters.Group" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<implements name="java.lang.Iterable"/>
|
|
<method name="getName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns raw name of the group. This is the name of the enum class
|
|
for this group of counters.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDisplayName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns localized name of the group. This is the same as getName() by
|
|
default, but different if an appropriate ResourceBundle is found.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setDisplayName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="displayName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the display name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="makeEscapedCompactString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the compact stringified version of the group in the format
|
|
{(actual-name)(display-name)(value)[][][]} where [] are compact strings for the
|
|
counters within.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="obj" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Checks for (content) equality of Groups]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="counterName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Returns the value of the specified counter, or 0 if the counter does
|
|
not exist.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="use {@link #getCounter(String)} instead">
|
|
<param name="id" type="int"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Get the counter for the given id and create it if it doesn't exist.
|
|
@param id the numeric id of the counter within the group
|
|
@param name the internal counter name
|
|
@return the counter
|
|
@deprecated use {@link #getCounter(String)} instead]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounterForName" return="org.apache.hadoop.mapred.Counters.Counter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Get the counter for the given name and create it if it doesn't exist.
|
|
@param name the internal counter name
|
|
@return the counter]]>
|
|
</doc>
|
|
</method>
|
|
<method name="size" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the number of counters in this group.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="iterator" return="java.util.Iterator"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>Group</code> of counters, comprising of counters from a particular
|
|
counter {@link Enum} class.
|
|
|
|
<p><code>Group</code>handles localization of the class name and the
|
|
counter names.</p>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.Counters.Group -->
|
|
<!-- start class org.apache.hadoop.mapred.FileAlreadyExistsException -->
|
|
<class name="FileAlreadyExistsException" extends="java.io.IOException"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="FileAlreadyExistsException"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="FileAlreadyExistsException" type="java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[Used when target file already exists for any operation and
|
|
is not configured to be overwritten.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.FileAlreadyExistsException -->
|
|
<!-- start class org.apache.hadoop.mapred.FileInputFormat -->
|
|
<class name="FileInputFormat" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.InputFormat"/>
|
|
<constructor name="FileInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setMinSplitSize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="minSplitSize" type="long"/>
|
|
</method>
|
|
<method name="isSplitable" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="filename" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Is the given filename splitable? Usually, true, but if the file is
|
|
stream compressed, it will not be.
|
|
|
|
<code>FileInputFormat</code> implementations can override this and return
|
|
<code>false</code> to ensure that individual input files are never split-up
|
|
so that {@link Mapper}s process entire files.
|
|
|
|
@param fs the file system that the file is on
|
|
@param filename the file name to check
|
|
@return is this file splitable?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="setInputPathFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="filter" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set a PathFilter to be applied to the input paths for the map-reduce job.
|
|
|
|
@param filter the PathFilter class use for filtering the input paths.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputPathFilter" return="org.apache.hadoop.fs.PathFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get a PathFilter instance of the filter set for the input paths.
|
|
|
|
@return the PathFilter instance set for the job, NULL if none has been set.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addInputPathRecursively"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="result" type="java.util.List"/>
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="inputFilter" type="org.apache.hadoop.fs.PathFilter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add files in the input path recursively into the results.
|
|
@param result
|
|
The List to store all files.
|
|
@param fs
|
|
The FileSystem.
|
|
@param path
|
|
The input path.
|
|
@param inputFilter
|
|
The input filter that can be used to filter files/dirs.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[List input directories.
|
|
Subclasses may override to, e.g., select only files matching a regular
|
|
expression.
|
|
|
|
@param job the job to list input paths for
|
|
@return array of FileStatus objects
|
|
@throws IOException if zero items.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="makeSplit" return="org.apache.hadoop.mapred.FileSplit"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="file" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="start" type="long"/>
|
|
<param name="length" type="long"/>
|
|
<param name="hosts" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[A factory that makes the split for this class. It can be overridden
|
|
by sub-classes to make sub-types]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="numSplits" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Splits files returned by {@link #listStatus(JobConf)} when
|
|
they're too big.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="computeSplitSize" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="goalSize" type="long"/>
|
|
<param name="minSize" type="long"/>
|
|
<param name="blockSize" type="long"/>
|
|
</method>
|
|
<method name="getBlockIndex" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/>
|
|
<param name="offset" type="long"/>
|
|
</method>
|
|
<method name="setInputPaths"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="commaSeparatedPaths" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Sets the given comma separated paths as the list of inputs
|
|
for the map-reduce job.
|
|
|
|
@param conf Configuration of the job
|
|
@param commaSeparatedPaths Comma separated paths to be set as
|
|
the list of inputs for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addInputPaths"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="commaSeparatedPaths" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Add the given comma separated paths to the list of inputs for
|
|
the map-reduce job.
|
|
|
|
@param conf The configuration of the job
|
|
@param commaSeparatedPaths Comma separated paths to be added to
|
|
the list of inputs for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInputPaths"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="inputPaths" type="org.apache.hadoop.fs.Path[]"/>
|
|
<doc>
|
|
<![CDATA[Set the array of {@link Path}s as the list of inputs
|
|
for the map-reduce job.
|
|
|
|
@param conf Configuration of the job.
|
|
@param inputPaths the {@link Path}s of the input directories/files
|
|
for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addInputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Add a {@link Path} to the list of inputs for the map-reduce job.
|
|
|
|
@param conf The configuration of the job
|
|
@param path {@link Path} to be added to the list of inputs for
|
|
the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputPaths" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the list of input {@link Path}s for the map-reduce job.
|
|
|
|
@param conf The configuration of the job
|
|
@return the list of input {@link Path}s for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplitHosts" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/>
|
|
<param name="offset" type="long"/>
|
|
<param name="splitSize" type="long"/>
|
|
<param name="clusterMap" type="org.apache.hadoop.net.NetworkTopology"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This function identifies and returns the hosts that contribute
|
|
most for a given split. For calculating the contribution, rack
|
|
locality is treated on par with host locality, so hosts from racks
|
|
that contribute the most are preferred over hosts on racks that
|
|
contribute less
|
|
@param blkLocations The list of block locations
|
|
@param offset
|
|
@param splitSize
|
|
@return array of hosts that contribute most to this split
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="NUM_INPUT_FILES" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A base class for file-based {@link InputFormat}.
|
|
|
|
<p><code>FileInputFormat</code> is the base class for all file-based
|
|
<code>InputFormat</code>s. This provides a generic implementation of
|
|
{@link #getSplits(JobConf, int)}.
|
|
Subclasses of <code>FileInputFormat</code> can also override the
|
|
{@link #isSplitable(FileSystem, Path)} method to ensure input-files are
|
|
not split-up and are processed as a whole by {@link Mapper}s.
|
|
instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.FileInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.FileOutputCommitter -->
|
|
<class name="FileOutputCommitter" extends="org.apache.hadoop.mapred.OutputCommitter"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="FileOutputCommitter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setupJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="commitJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="cleanupJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="abortJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
|
|
<param name="runState" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="setupTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="commitTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="abortTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="needsTaskCommit" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="TEMP_DIR_NAME" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Temporary directory name]]>
|
|
</doc>
|
|
</field>
|
|
<field name="SUCCEEDED_FILE_NAME" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[An {@link OutputCommitter} that commits files specified
|
|
in job output directory i.e. ${mapreduce.output.fileoutputformat.outputdir}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.FileOutputCommitter -->
|
|
<!-- start class org.apache.hadoop.mapred.FileOutputFormat -->
|
|
<class name="FileOutputFormat" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.OutputFormat"/>
|
|
<constructor name="FileOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setCompressOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="compress" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the output of the job is compressed.
|
|
@param conf the {@link JobConf} to modify
|
|
@param compress should the output of the job be compressed?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCompressOutput" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Is the job output compressed?
|
|
@param conf the {@link JobConf} to look in
|
|
@return <code>true</code> if the job output should be compressed,
|
|
<code>false</code> otherwise]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputCompressorClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="codecClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link CompressionCodec} to be used to compress job outputs.
|
|
@param conf the {@link JobConf} to modify
|
|
@param codecClass the {@link CompressionCodec} to be used to
|
|
compress the job outputs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputCompressorClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="defaultValue" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link CompressionCodec} for compressing the job outputs.
|
|
@param conf the {@link JobConf} to look in
|
|
@param defaultValue the {@link CompressionCodec} to return if not set
|
|
@return the {@link CompressionCodec} to be used to compress the
|
|
job outputs
|
|
@throws IllegalArgumentException if the class was specified, but not found]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="FileAlreadyExistsException" type="org.apache.hadoop.mapred.FileAlreadyExistsException"/>
|
|
<exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="setOutputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="outputDir" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Path} of the output directory for the map-reduce job.
|
|
|
|
@param conf The configuration of the job.
|
|
@param outputDir the {@link Path} of the output directory for
|
|
the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Path} to the output directory for the map-reduce job.
|
|
|
|
@return the {@link Path} to the output directory for the map-reduce job.
|
|
@see FileOutputFormat#getWorkOutputPath(JobConf)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getWorkOutputPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Path} to the task's temporary output directory
|
|
for the map-reduce job
|
|
|
|
<h4 id="SideEffectFiles">Tasks' Side-Effect Files</h4>
|
|
|
|
<p><i>Note:</i> The following is valid only if the {@link OutputCommitter}
|
|
is {@link FileOutputCommitter}. If <code>OutputCommitter</code> is not
|
|
a <code>FileOutputCommitter</code>, the task's temporary output
|
|
directory is same as {@link #getOutputPath(JobConf)} i.e.
|
|
<tt>${mapreduce.output.fileoutputformat.outputdir}$</tt></p>
|
|
|
|
<p>Some applications need to create/write-to side-files, which differ from
|
|
the actual job-outputs.
|
|
|
|
<p>In such cases there could be issues with 2 instances of the same TIP
|
|
(running simultaneously e.g. speculative tasks) trying to open/write-to the
|
|
same file (path) on HDFS. Hence the application-writer will have to pick
|
|
unique names per task-attempt (e.g. using the attemptid, say
|
|
<tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p>
|
|
|
|
<p>To get around this the Map-Reduce framework helps the application-writer
|
|
out by maintaining a special
|
|
<tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt>
|
|
sub-directory for each task-attempt on HDFS where the output of the
|
|
task-attempt goes. On successful completion of the task-attempt the files
|
|
in the <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> (only)
|
|
are <i>promoted</i> to <tt>${mapreduce.output.fileoutputformat.outputdir}</tt>. Of course, the
|
|
framework discards the sub-directory of unsuccessful task-attempts. This
|
|
is completely transparent to the application.</p>
|
|
|
|
<p>The application-writer can take advantage of this by creating any
|
|
side-files required in <tt>${mapreduce.task.output.dir}</tt> during execution
|
|
of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the
|
|
framework will move them out similarly - thus she doesn't have to pick
|
|
unique paths per task-attempt.</p>
|
|
|
|
<p><i>Note</i>: the value of <tt>${mapreduce.task.output.dir}</tt> during
|
|
execution of a particular task-attempt is actually
|
|
<tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_{$taskid}</tt>, and this value is
|
|
set by the map-reduce framework. So, just create any side-files in the
|
|
path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce
|
|
task to take advantage of this feature.</p>
|
|
|
|
<p>The entire discussion holds true for maps of jobs with
|
|
reducer=NONE (i.e. 0 reduces) since output of the map, in that case,
|
|
goes directly to HDFS.</p>
|
|
|
|
@return the {@link Path} to the task's temporary output directory
|
|
for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskOutputPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Helper function to create the task's temporary output directory and
|
|
return the path to the task's output file.
|
|
|
|
@param conf job-configuration
|
|
@param name temporary task-output filename
|
|
@return path to the task's temporary output file
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUniqueName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Helper function to generate a name that is unique for the task.
|
|
|
|
<p>The generated name can be used to create custom files from within the
|
|
different tasks for the job, the names for different tasks will not collide
|
|
with each other.</p>
|
|
|
|
<p>The given name is postfixed with the task type, 'm' for maps, 'r' for
|
|
reduces and the task partition number. For example, give a name 'test'
|
|
running on the first map o the job the generated name will be
|
|
'test-m-00000'.</p>
|
|
|
|
@param conf the configuration for the job.
|
|
@param name the name to make unique.
|
|
@return a unique name accross all tasks of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPathForCustomFile" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Helper function to generate a {@link Path} for a file that is unique for
|
|
the task within the job output directory.
|
|
|
|
<p>The path can be used to create custom files from within the map and
|
|
reduce tasks. The path name will be unique for each task. The path parent
|
|
will be the job output directory.</p>ls
|
|
|
|
<p>This method uses the {@link #getUniqueName} method to make the file name
|
|
unique for the task.</p>
|
|
|
|
@param conf the configuration for the job.
|
|
@param name the name for the file.
|
|
@return a unique path accross all tasks of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A base class for {@link OutputFormat}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.FileOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.FileSplit -->
|
|
<class name="FileSplit" extends="org.apache.hadoop.mapreduce.InputSplit"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.InputSplit"/>
|
|
<constructor name="FileSplit"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, org.apache.hadoop.mapred.JobConf"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="deprecated, no comment">
|
|
<doc>
|
|
<![CDATA[Constructs a split.
|
|
@deprecated
|
|
@param file the file name
|
|
@param start the position of the first byte in the file to process
|
|
@param length the number of bytes in the file to process]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a split with host information
|
|
|
|
@param file the file name
|
|
@param start the position of the first byte in the file to process
|
|
@param length the number of bytes in the file to process
|
|
@param hosts the list of hosts containing the block, possibly null]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="FileSplit" type="org.apache.hadoop.mapreduce.lib.input.FileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The file containing this split's data.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getStart" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The position of the first byte in the file to process.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The number of bytes in the file to process.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A section of an input file. Returned by {@link
|
|
InputFormat#getSplits(JobConf, int)} and passed to
|
|
{@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}.
|
|
instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.FileSplit -->
|
|
<!-- start class org.apache.hadoop.mapred.ID -->
|
|
<class name="ID" extends="org.apache.hadoop.mapreduce.ID"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ID" type="int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[constructs an ID object from the given int]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="ID"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[A general identifier, which internally stores the id
|
|
as an integer. This is the super class of {@link JobID},
|
|
{@link TaskID} and {@link TaskAttemptID}.
|
|
|
|
@see JobID
|
|
@see TaskID
|
|
@see TaskAttemptID]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.ID -->
|
|
<!-- start interface org.apache.hadoop.mapred.InputFormat -->
|
|
<interface name="InputFormat" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="numSplits" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Logically split the set of input files for the job.
|
|
|
|
<p>Each {@link InputSplit} is then assigned to an individual {@link Mapper}
|
|
for processing.</p>
|
|
|
|
<p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the
|
|
input files are not physically split into chunks. For e.g. a split could
|
|
be <i><input-file-path, start, offset></i> tuple.
|
|
|
|
@param job job configuration.
|
|
@param numSplits the desired number of splits, a hint.
|
|
@return an array of {@link InputSplit}s for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link RecordReader} for the given {@link InputSplit}.
|
|
|
|
<p>It is the responsibility of the <code>RecordReader</code> to respect
|
|
record boundaries while processing the logical split to present a
|
|
record-oriented view to the individual task.</p>
|
|
|
|
@param split the {@link InputSplit}
|
|
@param job the job that this split belongs to
|
|
@return a {@link RecordReader}]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>InputFormat</code> describes the input-specification for a
|
|
Map-Reduce job.
|
|
|
|
<p>The Map-Reduce framework relies on the <code>InputFormat</code> of the
|
|
job to:<p>
|
|
<ol>
|
|
<li>
|
|
Validate the input-specification of the job.
|
|
<li>
|
|
Split-up the input file(s) into logical {@link InputSplit}s, each of
|
|
which is then assigned to an individual {@link Mapper}.
|
|
</li>
|
|
<li>
|
|
Provide the {@link RecordReader} implementation to be used to glean
|
|
input records from the logical <code>InputSplit</code> for processing by
|
|
the {@link Mapper}.
|
|
</li>
|
|
</ol>
|
|
|
|
<p>The default behavior of file-based {@link InputFormat}s, typically
|
|
sub-classes of {@link FileInputFormat}, is to split the
|
|
input into <i>logical</i> {@link InputSplit}s based on the total size, in
|
|
bytes, of the input files. However, the {@link FileSystem} blocksize of
|
|
the input files is treated as an upper bound for input splits. A lower bound
|
|
on the split size can be set via
|
|
<a href="{@docRoot}/../mapred-default.html#mapreduce.input.fileinputformat.split.minsize">
|
|
mapreduce.input.fileinputformat.split.minsize</a>.</p>
|
|
|
|
<p>Clearly, logical splits based on input-size is insufficient for many
|
|
applications since record boundaries are to respected. In such cases, the
|
|
application has to also implement a {@link RecordReader} on whom lies the
|
|
responsibilty to respect record-boundaries and present a record-oriented
|
|
view of the logical <code>InputSplit</code> to the individual task.
|
|
|
|
@see InputSplit
|
|
@see RecordReader
|
|
@see JobClient
|
|
@see FileInputFormat]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.InputFormat -->
|
|
<!-- start interface org.apache.hadoop.mapred.InputSplit -->
|
|
<interface name="InputSplit" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the total number of bytes in the data of the <code>InputSplit</code>.
|
|
|
|
@return the number of bytes in the input split.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the list of hostnames where the input split is located.
|
|
|
|
@return list of hostnames where data of the <code>InputSplit</code> is
|
|
located as an array of <code>String</code>s.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>InputSplit</code> represents the data to be processed by an
|
|
individual {@link Mapper}.
|
|
|
|
<p>Typically, it presents a byte-oriented view on the input and is the
|
|
responsibility of {@link RecordReader} of the job to process this and present
|
|
a record-oriented view.
|
|
|
|
@see InputFormat
|
|
@see RecordReader]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.InputSplit -->
|
|
<!-- start class org.apache.hadoop.mapred.InvalidFileTypeException -->
|
|
<class name="InvalidFileTypeException" extends="java.io.IOException"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="InvalidFileTypeException"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="InvalidFileTypeException" type="java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[Used when file type differs from the desired file type. like
|
|
getting a file when a directory is expected. Or a wrong file type.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.InvalidFileTypeException -->
|
|
<!-- start class org.apache.hadoop.mapred.InvalidInputException -->
|
|
<class name="InvalidInputException" extends="java.io.IOException"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="InvalidInputException" type="java.util.List"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create the exception with the given list.
|
|
@param probs the list of problems to report. this list is not copied.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getProblems" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the complete list of the problems reported.
|
|
@return the list of problems, which must not be modified]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMessage" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get a summary message of the problems found.
|
|
@return the concatenated messages from all of the problems.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class wraps a list of problems with the input, so that the user
|
|
can get a list of problems together instead of finding and fixing them one
|
|
by one.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.InvalidInputException -->
|
|
<!-- start class org.apache.hadoop.mapred.InvalidJobConfException -->
|
|
<class name="InvalidJobConfException" extends="java.io.IOException"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="InvalidJobConfException"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="InvalidJobConfException" type="java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[This exception is thrown when jobconf misses some mendatory attributes
|
|
or value of some attributes is invalid.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.InvalidJobConfException -->
|
|
<!-- start class org.apache.hadoop.mapred.IsolationRunner -->
|
|
<class name="IsolationRunner" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="IsolationRunner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Run a single task.
|
|
|
|
@param args the first argument is the task directory]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[IsolationRunner is intended to facilitate debugging by re-running a specific
|
|
task, given left-over task files for a (typically failed) past job.
|
|
Currently, it is limited to re-running map tasks.
|
|
|
|
Users may coerce MapReduce to keep task files around by setting
|
|
mapreduce.task.files.preserve.failedtasks. See mapred_tutorial.xml for more documentation.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.IsolationRunner -->
|
|
<!-- start class org.apache.hadoop.mapred.JobClient -->
|
|
<class name="JobClient" extends="org.apache.hadoop.mapreduce.tools.CLI"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobClient"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a job client.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobClient" type="org.apache.hadoop.mapred.JobConf"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Build a job client with the given {@link JobConf}, and connect to the
|
|
default {@link JobTracker}.
|
|
|
|
@param conf the job configuration.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobClient" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Build a job client with the given {@link Configuration},
|
|
and connect to the default {@link JobTracker}.
|
|
|
|
@param conf the configuration.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobClient" type="java.net.InetSocketAddress, org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Build a job client, connect to the indicated job tracker.
|
|
|
|
@param jobTrackAddr the job tracker to connect to.
|
|
@param conf configuration.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="init"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Connect to the default {@link JobTracker}.
|
|
@param conf the job configuration.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close the <code>JobClient</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getFs" return="org.apache.hadoop.fs.FileSystem"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get a filesystem handle. We need this to prepare jobs
|
|
for submission to the MapReduce system.
|
|
|
|
@return the filesystem handle.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getClusterHandle" return="org.apache.hadoop.mapreduce.Cluster"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get a handle to the Cluster]]>
|
|
</doc>
|
|
</method>
|
|
<method name="submitJob" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobFile" type="java.lang.String"/>
|
|
<exception name="FileNotFoundException" type="java.io.FileNotFoundException"/>
|
|
<exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Submit a job to the MR system.
|
|
|
|
This returns a handle to the {@link RunningJob} which can be used to track
|
|
the running-job.
|
|
|
|
@param jobFile the job configuration.
|
|
@return a handle to the {@link RunningJob} which can be used to track the
|
|
running-job.
|
|
@throws FileNotFoundException
|
|
@throws InvalidJobConfException
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="submitJob" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="FileNotFoundException" type="java.io.FileNotFoundException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Submit a job to the MR system.
|
|
This returns a handle to the {@link RunningJob} which can be used to track
|
|
the running-job.
|
|
|
|
@param conf the job configuration.
|
|
@return a handle to the {@link RunningJob} which can be used to track the
|
|
running-job.
|
|
@throws FileNotFoundException
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJob" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get an {@link RunningJob} object to track an ongoing job. Returns
|
|
null if the id does not correspond to any known job.
|
|
|
|
@param jobid the jobid of the job.
|
|
@return the {@link RunningJob} handle to track the job, null if the
|
|
<code>jobid</code> doesn't correspond to any known job.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJob" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Applications should rather use {@link #getJob(JobID)}.">
|
|
<param name="jobid" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@deprecated Applications should rather use {@link #getJob(JobID)}.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the information of the current state of the map tasks of a job.
|
|
|
|
@param jobId the job to query.
|
|
@return the list of all of the map tips.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Applications should rather use {@link #getMapTaskReports(JobID)}">
|
|
<param name="jobId" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@deprecated Applications should rather use {@link #getMapTaskReports(JobID)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the information of the current state of the reduce tasks of a job.
|
|
|
|
@param jobId the job to query.
|
|
@return the list of all of the reduce tips.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCleanupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the information of the current state of the cleanup tasks of a job.
|
|
|
|
@param jobId the job to query.
|
|
@return the list of all of the cleanup tips.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSetupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the information of the current state of the setup tasks of a job.
|
|
|
|
@param jobId the job to query.
|
|
@return the list of all of the setup tips.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Applications should rather use {@link #getReduceTaskReports(JobID)}">
|
|
<param name="jobId" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@deprecated Applications should rather use {@link #getReduceTaskReports(JobID)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="displayTasks"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<param name="type" type="java.lang.String"/>
|
|
<param name="state" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Display the information about a job's tasks, of a particular type and
|
|
in a particular state
|
|
|
|
@param jobId the ID of the job
|
|
@param type the type of the task (map/reduce/setup/cleanup)
|
|
@param state the state of the task
|
|
(pending/running/completed/failed/killed)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get status information about the Map-Reduce cluster.
|
|
|
|
@return the status information about the Map-Reduce cluster as an object
|
|
of {@link ClusterStatus}.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="detailed" type="boolean"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get status information about the Map-Reduce cluster.
|
|
|
|
@param detailed if true then get a detailed status including the
|
|
tracker names
|
|
@return the status information about the Map-Reduce cluster as an object
|
|
of {@link ClusterStatus}.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="jobsToComplete" return="org.apache.hadoop.mapred.JobStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the jobs that are not completed and not failed.
|
|
|
|
@return array of {@link JobStatus} for the running/to-be-run jobs.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getAllJobs" return="org.apache.hadoop.mapred.JobStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the jobs that are submitted.
|
|
|
|
@return array of {@link JobStatus} for the submitted jobs.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="runJob" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Utility that submits a job, then polls for progress until the job is
|
|
complete.
|
|
|
|
@param job the job configuration.
|
|
@throws IOException if the job fails]]>
|
|
</doc>
|
|
</method>
|
|
<method name="monitorAndPrintJob" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.RunningJob"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Monitor a job and print status in real-time as progress is made and tasks
|
|
fail.
|
|
@param conf the job's configuration
|
|
@param job the job to track
|
|
@return true if the job succeeded
|
|
@throws IOException if communication to the JobTracker fails]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskOutputFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/>
|
|
<doc>
|
|
<![CDATA[Sets the output filter for tasks. only those tasks are printed whose
|
|
output matches the filter.
|
|
@param newValue task filter.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the task output filter out of the JobConf.
|
|
|
|
@param job the JobConf to examine.
|
|
@return the filter level.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskOutputFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/>
|
|
<doc>
|
|
<![CDATA[Modify the JobConf to set the task output filter.
|
|
|
|
@param job the JobConf to modify.
|
|
@param newValue the value to set.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns task output filter.
|
|
@return task filter.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="cntrs" type="org.apache.hadoop.mapreduce.Counters"/>
|
|
<param name="counterGroupName" type="java.lang.String"/>
|
|
<param name="counterName" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getDefaultMaps" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get status information about the max available Maps in the cluster.
|
|
|
|
@return the max available Maps in the cluster
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDefaultReduces" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get status information about the max available Reduces in the cluster.
|
|
|
|
@return the max available Reduces in the cluster
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSystemDir" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Grab the jobtracker system directory path where job-specific files are to be placed.
|
|
|
|
@return the system directory where job-specific files are to be placed.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRootQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Returns an array of queue information objects about root level queues
|
|
configured
|
|
|
|
@return the array of root level JobQueueInfo objects
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getChildQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="queueName" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Returns an array of queue information objects about immediate children
|
|
of queue queueName.
|
|
|
|
@param queueName
|
|
@return the array of immediate children JobQueueInfo objects
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Return an array of queue information objects about all the Job Queues
|
|
configured.
|
|
|
|
@return Array of JobQueueInfo objects
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobsFromQueue" return="org.apache.hadoop.mapred.JobStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="queueName" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets all the jobs which were added to particular Job Queue
|
|
|
|
@param queueName name of the Job Queue
|
|
@return Array of jobs present in the job queue
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getQueueInfo" return="org.apache.hadoop.mapred.JobQueueInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="queueName" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets the queue information associated to a particular Job Queue
|
|
|
|
@param queueName name of the job queue.
|
|
@return Queue information associated to particular queue.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getQueueAclsForCurrentUser" return="org.apache.hadoop.mapred.QueueAclsInfo[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets the Queue ACLs for current user
|
|
@return array of QueueAclsInfo object for current user.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDelegationToken" return="org.apache.hadoop.security.token.Token"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="renewer" type="org.apache.hadoop.io.Text"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get a delegation token for the user from the JobTracker.
|
|
@param renewer the user who can renew the token
|
|
@return the new token
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="renewDelegationToken" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="token" type="org.apache.hadoop.security.token.Token"/>
|
|
<exception name="SecretManager.InvalidToken" type="org.apache.hadoop.security.token.SecretManager.InvalidToken"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Renew a delegation token
|
|
@param token the token to renew
|
|
@return true if the renewal went well
|
|
@throws InvalidToken
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cancelDelegationToken"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="token" type="org.apache.hadoop.security.token.Token"/>
|
|
<exception name="SecretManager.InvalidToken" type="org.apache.hadoop.security.token.SecretManager.InvalidToken"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Cancel a delegation token from the JobTracker
|
|
@param token the token to cancel
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="argv" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>JobClient</code> is the primary interface for the user-job to interact
|
|
with the {@link JobTracker}.
|
|
|
|
<code>JobClient</code> provides facilities to submit jobs, track their
|
|
progress, access component-tasks' reports/logs, get the Map-Reduce cluster
|
|
status information etc.
|
|
|
|
<p>The job submission process involves:
|
|
<ol>
|
|
<li>
|
|
Checking the input and output specifications of the job.
|
|
</li>
|
|
<li>
|
|
Computing the {@link InputSplit}s for the job.
|
|
</li>
|
|
<li>
|
|
Setup the requisite accounting information for the {@link DistributedCache}
|
|
of the job, if necessary.
|
|
</li>
|
|
<li>
|
|
Copying the job's jar and configuration to the map-reduce system directory
|
|
on the distributed file-system.
|
|
</li>
|
|
<li>
|
|
Submitting the job to the <code>JobTracker</code> and optionally monitoring
|
|
it's status.
|
|
</li>
|
|
</ol></p>
|
|
|
|
Normally the user creates the application, describes various facets of the
|
|
job via {@link JobConf} and then uses the <code>JobClient</code> to submit
|
|
the job and monitor its progress.
|
|
|
|
<p>Here is an example on how to use <code>JobClient</code>:</p>
|
|
<p><blockquote><pre>
|
|
// Create a new JobConf
|
|
JobConf job = new JobConf(new Configuration(), MyJob.class);
|
|
|
|
// Specify various job-specific parameters
|
|
job.setJobName("myjob");
|
|
|
|
job.setInputPath(new Path("in"));
|
|
job.setOutputPath(new Path("out"));
|
|
|
|
job.setMapperClass(MyJob.MyMapper.class);
|
|
job.setReducerClass(MyJob.MyReducer.class);
|
|
|
|
// Submit the job, then poll for progress until the job is complete
|
|
JobClient.runJob(job);
|
|
</pre></blockquote></p>
|
|
|
|
<h4 id="JobControl">Job Control</h4>
|
|
|
|
<p>At times clients would chain map-reduce jobs to accomplish complex tasks
|
|
which cannot be done via a single map-reduce job. This is fairly easy since
|
|
the output of the job, typically, goes to distributed file-system and that
|
|
can be used as the input for the next job.</p>
|
|
|
|
<p>However, this also means that the onus on ensuring jobs are complete
|
|
(success/failure) lies squarely on the clients. In such situations the
|
|
various job-control options are:
|
|
<ol>
|
|
<li>
|
|
{@link #runJob(JobConf)} : submits the job and returns only after
|
|
the job has completed.
|
|
</li>
|
|
<li>
|
|
{@link #submitJob(JobConf)} : only submits the job, then poll the
|
|
returned handle to the {@link RunningJob} to query status and make
|
|
scheduling decisions.
|
|
</li>
|
|
<li>
|
|
{@link JobConf#setJobEndNotificationURI(String)} : setup a notification
|
|
on job-completion, thus avoiding polling.
|
|
</li>
|
|
</ol></p>
|
|
|
|
@see JobConf
|
|
@see ClusterStatus
|
|
@see Tool
|
|
@see DistributedCache]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobClient -->
|
|
<!-- start class org.apache.hadoop.mapred.JobClient.TaskStatusFilter -->
|
|
<class name="JobClient.TaskStatusFilter" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobClient.TaskStatusFilter -->
|
|
<!-- start class org.apache.hadoop.mapred.JobConf -->
|
|
<class name="JobConf" extends="org.apache.hadoop.conf.Configuration"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobConf"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a map/reduce job configuration.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobConf" type="java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a map/reduce job configuration.
|
|
|
|
@param exampleClass a class whose containing jar is used as the job's jar.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobConf" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a map/reduce job configuration.
|
|
|
|
@param conf a Configuration whose settings will be inherited.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobConf" type="org.apache.hadoop.conf.Configuration, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a map/reduce job configuration.
|
|
|
|
@param conf a Configuration whose settings will be inherited.
|
|
@param exampleClass a class whose containing jar is used as the job's jar.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobConf" type="java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a map/reduce configuration.
|
|
|
|
@param config a Configuration-format XML job description file.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobConf" type="org.apache.hadoop.fs.Path"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a map/reduce configuration.
|
|
|
|
@param config a Configuration-format XML job description file.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobConf" type="boolean"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[A new map/reduce configuration where the behavior of reading from the
|
|
default resources can be turned off.
|
|
<p/>
|
|
If the parameter {@code loadDefaults} is false, the new instance
|
|
will not load resources from the default files.
|
|
|
|
@param loadDefaults specifies whether to load from the default files]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getJar" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user jar for the map-reduce job.
|
|
|
|
@return the user jar for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJar"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jar" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the user jar for the map-reduce job.
|
|
|
|
@param jar the user jar for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJarUnpackPattern" return="java.util.regex.Pattern"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the pattern for jar contents to unpack on the tasktracker]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJarByClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the job's jar file by finding an example class location.
|
|
|
|
@param cls the example class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocalDirs" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="deleteLocalFiles"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Use MRAsyncDiskService.moveAndDeleteAllVolumes instead.
|
|
@see org.apache.hadoop.mapreduce.util.MRAsyncDiskService#cleanupAllVolumes()]]>
|
|
</doc>
|
|
</method>
|
|
<method name="deleteLocalFiles"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="subdir" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getLocalPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="pathString" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Constructs a local file name. Files are distributed among configured
|
|
local directories.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUser" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the reported username for this job.
|
|
|
|
@return the username]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setUser"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="user" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the reported username for this job.
|
|
|
|
@param user the username for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setKeepFailedTaskFiles"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="keep" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the framework should keep the intermediate files for
|
|
failed tasks.
|
|
|
|
@param keep <code>true</code> if framework should keep the intermediate files
|
|
for failed tasks, <code>false</code> otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getKeepFailedTaskFiles" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Should the temporary files for failed tasks be kept?
|
|
|
|
@return should the files be kept?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setKeepTaskFilesPattern"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="pattern" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set a regular expression for task names that should be kept.
|
|
The regular expression ".*_m_000123_0" would keep the files
|
|
for the first instance of map 123 that ran.
|
|
|
|
@param pattern the java.util.regex.Pattern to match against the
|
|
task names.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getKeepTaskFilesPattern" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the regular expression that is matched against the task names
|
|
to see if we need to keep the files.
|
|
|
|
@return the pattern as a string, if it was set, othewise null.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setWorkingDirectory"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="dir" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Set the current working directory for the default file system.
|
|
|
|
@param dir the new current working directory.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the current working directory for the default file system.
|
|
|
|
@return the directory name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setNumTasksToExecutePerJvm"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="numTasks" type="int"/>
|
|
<doc>
|
|
<![CDATA[Sets the number of tasks that a spawned task JVM should run
|
|
before it exits
|
|
@param numTasks the number of tasks to execute; defaults to 1;
|
|
-1 signifies no limit]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumTasksToExecutePerJvm" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of tasks that a spawned JVM should execute]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputFormat" return="org.apache.hadoop.mapred.InputFormat"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link InputFormat} implementation for the map-reduce job,
|
|
defaults to {@link TextInputFormat} if not specified explicity.
|
|
|
|
@return the {@link InputFormat} implementation for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInputFormat"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link InputFormat} implementation for the map-reduce job.
|
|
|
|
@param theClass the {@link InputFormat} implementation for the map-reduce
|
|
job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputFormat" return="org.apache.hadoop.mapred.OutputFormat"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link OutputFormat} implementation for the map-reduce job,
|
|
defaults to {@link TextOutputFormat} if not specified explicity.
|
|
|
|
@return the {@link OutputFormat} implementation for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputCommitter" return="org.apache.hadoop.mapred.OutputCommitter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link OutputCommitter} implementation for the map-reduce job,
|
|
defaults to {@link FileOutputCommitter} if not specified explicitly.
|
|
|
|
@return the {@link OutputCommitter} implementation for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputCommitter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link OutputCommitter} implementation for the map-reduce job.
|
|
|
|
@param theClass the {@link OutputCommitter} implementation for the map-reduce
|
|
job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputFormat"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link OutputFormat} implementation for the map-reduce job.
|
|
|
|
@param theClass the {@link OutputFormat} implementation for the map-reduce
|
|
job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setCompressMapOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="compress" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Should the map outputs be compressed before transfer?
|
|
Uses the SequenceFile compression.
|
|
|
|
@param compress should the map outputs be compressed?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCompressMapOutput" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Are the outputs of the maps be compressed?
|
|
|
|
@return <code>true</code> if the outputs of the maps are to be compressed,
|
|
<code>false</code> otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapOutputCompressorClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="codecClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the given class as the {@link CompressionCodec} for the map outputs.
|
|
|
|
@param codecClass the {@link CompressionCodec} class that will compress
|
|
the map outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapOutputCompressorClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="defaultValue" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link CompressionCodec} for compressing the map outputs.
|
|
|
|
@param defaultValue the {@link CompressionCodec} to return if not set
|
|
@return the {@link CompressionCodec} class that should be used to compress the
|
|
map outputs.
|
|
@throws IllegalArgumentException if the class was specified, but not found]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the key class for the map output data. If it is not set, use the
|
|
(final) output key class. This allows the map output key class to be
|
|
different than the final output key class.
|
|
|
|
@return the map output key class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapOutputKeyClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the key class for the map output data. This allows the user to
|
|
specify the map output key class to be different than the final output
|
|
value class.
|
|
|
|
@param theClass the map output key class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the value class for the map output data. If it is not set, use the
|
|
(final) output value class This allows the map output value class to be
|
|
different than the final output value class.
|
|
|
|
@return the map output value class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapOutputValueClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the value class for the map output data. This allows the user to
|
|
specify the map output value class to be different than the final output
|
|
value class.
|
|
|
|
@param theClass the map output value class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the key class for the job output data.
|
|
|
|
@return the key class for the job output data.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputKeyClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the key class for the job output data.
|
|
|
|
@param theClass the key class for the job output data.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputKeyComparator" return="org.apache.hadoop.io.RawComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link RawComparator} comparator used to compare keys.
|
|
|
|
@return the {@link RawComparator} comparator used to compare keys.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputKeyComparatorClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link RawComparator} comparator used to compare keys.
|
|
|
|
@param theClass the {@link RawComparator} comparator used to
|
|
compare keys.
|
|
@see #setOutputValueGroupingComparator(Class)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setKeyFieldComparatorOptions"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="keySpec" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link KeyFieldBasedComparator} options used to compare keys.
|
|
|
|
@param keySpec the key specification of the form -k pos1[,pos2], where,
|
|
pos is of the form f[.c][opts], where f is the number
|
|
of the key field to use, and c is the number of the first character from
|
|
the beginning of the field. Fields and character posns are numbered
|
|
starting with 1; a character position of zero in pos2 indicates the
|
|
field's last character. If '.c' is omitted from pos1, it defaults to 1
|
|
(the beginning of the field); if omitted from pos2, it defaults to 0
|
|
(the end of the field). opts are ordering options. The supported options
|
|
are:
|
|
-n, (Sort numerically)
|
|
-r, (Reverse the result of comparison)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getKeyFieldComparatorOption" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link KeyFieldBasedComparator} options]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setKeyFieldPartitionerOptions"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="keySpec" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link KeyFieldBasedPartitioner} options used for
|
|
{@link Partitioner}
|
|
|
|
@param keySpec the key specification of the form -k pos1[,pos2], where,
|
|
pos is of the form f[.c][opts], where f is the number
|
|
of the key field to use, and c is the number of the first character from
|
|
the beginning of the field. Fields and character posns are numbered
|
|
starting with 1; a character position of zero in pos2 indicates the
|
|
field's last character. If '.c' is omitted from pos1, it defaults to 1
|
|
(the beginning of the field); if omitted from pos2, it defaults to 0
|
|
(the end of the field).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getKeyFieldPartitionerOption" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link KeyFieldBasedPartitioner} options]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputValueGroupingComparator" return="org.apache.hadoop.io.RawComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user defined {@link WritableComparable} comparator for
|
|
grouping keys of inputs to the reduce.
|
|
|
|
@return comparator set by the user for grouping values.
|
|
@see #setOutputValueGroupingComparator(Class) for details.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputValueGroupingComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the user defined {@link RawComparator} comparator for
|
|
grouping keys in the input to the reduce.
|
|
|
|
<p>This comparator should be provided if the equivalence rules for keys
|
|
for sorting the intermediates are different from those for grouping keys
|
|
before each call to
|
|
{@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.</p>
|
|
|
|
<p>For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed
|
|
in a single call to the reduce function if K1 and K2 compare as equal.</p>
|
|
|
|
<p>Since {@link #setOutputKeyComparatorClass(Class)} can be used to control
|
|
how keys are sorted, this can be used in conjunction to simulate
|
|
<i>secondary sort on values</i>.</p>
|
|
|
|
<p><i>Note</i>: This is not a guarantee of the reduce sort being
|
|
<i>stable</i> in any sense. (In any case, with the order of available
|
|
map-outputs to the reduce being non-deterministic, it wouldn't make
|
|
that much sense.)</p>
|
|
|
|
@param theClass the comparator class to be used for grouping keys.
|
|
It should implement <code>RawComparator</code>.
|
|
@see #setOutputKeyComparatorClass(Class)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUseNewMapper" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Should the framework use the new context-object code for running
|
|
the mapper?
|
|
@return true, if the new api should be used]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setUseNewMapper"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="flag" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the framework should use the new api for the mapper.
|
|
This is the default for jobs submitted with the new Job api.
|
|
@param flag true, if the new api should be used]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUseNewReducer" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Should the framework use the new context-object code for running
|
|
the reducer?
|
|
@return true, if the new api should be used]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setUseNewReducer"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="flag" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the framework should use the new api for the reducer.
|
|
This is the default for jobs submitted with the new Job api.
|
|
@param flag true, if the new api should be used]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the value class for job outputs.
|
|
|
|
@return the value class for job outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputValueClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the value class for job outputs.
|
|
|
|
@param theClass the value class for job outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapperClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link Mapper} class for the job.
|
|
|
|
@return the {@link Mapper} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapperClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Mapper} class for the job.
|
|
|
|
@param theClass the {@link Mapper} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapRunnerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link MapRunnable} class for the job.
|
|
|
|
@return the {@link MapRunnable} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapRunnerClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Expert: Set the {@link MapRunnable} class for the job.
|
|
|
|
Typically used to exert greater control on {@link Mapper}s.
|
|
|
|
@param theClass the {@link MapRunnable} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPartitionerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link Partitioner} used to partition {@link Mapper}-outputs
|
|
to be sent to the {@link Reducer}s.
|
|
|
|
@return the {@link Partitioner} used to partition map-outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setPartitionerClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Partitioner} class used to partition
|
|
{@link Mapper}-outputs to be sent to the {@link Reducer}s.
|
|
|
|
@param theClass the {@link Partitioner} used to partition map-outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReducerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link Reducer} class for the job.
|
|
|
|
@return the {@link Reducer} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setReducerClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Reducer} class for the job.
|
|
|
|
@param theClass the {@link Reducer} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user-defined <i>combiner</i> class used to combine map-outputs
|
|
before being sent to the reducers. Typically the combiner is same as the
|
|
the {@link Reducer} for the job i.e. {@link #getReducerClass()}.
|
|
|
|
@return the user-defined combiner class used to combine map-outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setCombinerClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the user-defined <i>combiner</i> class used to combine map-outputs
|
|
before being sent to the reducers.
|
|
|
|
<p>The combiner is an application-specified aggregation operation, which
|
|
can help cut down the amount of data transferred between the
|
|
{@link Mapper} and the {@link Reducer}, leading to better performance.</p>
|
|
|
|
<p>The framework may invoke the combiner 0, 1, or multiple times, in both
|
|
the mapper and reducer tasks. In general, the combiner is called as the
|
|
sort/merge result is written to disk. The combiner must:
|
|
<ul>
|
|
<li> be side-effect free</li>
|
|
<li> have the same input and output key types and the same input and
|
|
output value types</li>
|
|
</ul></p>
|
|
|
|
<p>Typically the combiner is same as the <code>Reducer</code> for the
|
|
job i.e. {@link #setReducerClass(Class)}.</p>
|
|
|
|
@param theClass the user-defined combiner class used to combine
|
|
map-outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSpeculativeExecution" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Should speculative execution be used for this job?
|
|
Defaults to <code>true</code>.
|
|
|
|
@return <code>true</code> if speculative execution be used for this job,
|
|
<code>false</code> otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSpeculativeExecution"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="speculativeExecution" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Turn speculative execution on or off for this job.
|
|
|
|
@param speculativeExecution <code>true</code> if speculative execution
|
|
should be turned on, else <code>false</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapSpeculativeExecution" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Should speculative execution be used for this job for map tasks?
|
|
Defaults to <code>true</code>.
|
|
|
|
@return <code>true</code> if speculative execution be
|
|
used for this job for map tasks,
|
|
<code>false</code> otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapSpeculativeExecution"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="speculativeExecution" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Turn speculative execution on or off for this job for map tasks.
|
|
|
|
@param speculativeExecution <code>true</code> if speculative execution
|
|
should be turned on for map tasks,
|
|
else <code>false</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReduceSpeculativeExecution" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Should speculative execution be used for this job for reduce tasks?
|
|
Defaults to <code>true</code>.
|
|
|
|
@return <code>true</code> if speculative execution be used
|
|
for reduce tasks for this job,
|
|
<code>false</code> otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setReduceSpeculativeExecution"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="speculativeExecution" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Turn speculative execution on or off for this job for reduce tasks.
|
|
|
|
@param speculativeExecution <code>true</code> if speculative execution
|
|
should be turned on for reduce tasks,
|
|
else <code>false</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumMapTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get configured the number of reduce tasks for this job.
|
|
Defaults to <code>1</code>.
|
|
|
|
@return the number of reduce tasks for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setNumMapTasks"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="n" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the number of map tasks for this job.
|
|
|
|
<p><i>Note</i>: This is only a <i>hint</i> to the framework. The actual
|
|
number of spawned map tasks depends on the number of {@link InputSplit}s
|
|
generated by the job's {@link InputFormat#getSplits(JobConf, int)}.
|
|
|
|
A custom {@link InputFormat} is typically used to accurately control
|
|
the number of map tasks for the job.</p>
|
|
|
|
<h4 id="NoOfMaps">How many maps?</h4>
|
|
|
|
<p>The number of maps is usually driven by the total size of the inputs
|
|
i.e. total number of blocks of the input files.</p>
|
|
|
|
<p>The right level of parallelism for maps seems to be around 10-100 maps
|
|
per-node, although it has been set up to 300 or so for very cpu-light map
|
|
tasks. Task setup takes awhile, so it is best if the maps take at least a
|
|
minute to execute.</p>
|
|
|
|
<p>The default behavior of file-based {@link InputFormat}s is to split the
|
|
input into <i>logical</i> {@link InputSplit}s based on the total size, in
|
|
bytes, of input files. However, the {@link FileSystem} blocksize of the
|
|
input files is treated as an upper bound for input splits. A lower bound
|
|
on the split size can be set via
|
|
<a href="{@docRoot}/../mapred-default.html#mapreduce.input.fileinputformat.split.minsize">
|
|
mapreduce.input.fileinputformat.split.minsize</a>.</p>
|
|
|
|
<p>Thus, if you expect 10TB of input data and have a blocksize of 128MB,
|
|
you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is
|
|
used to set it even higher.</p>
|
|
|
|
@param n the number of map tasks for this job.
|
|
@see InputFormat#getSplits(JobConf, int)
|
|
@see FileInputFormat
|
|
@see FileSystem#getDefaultBlockSize()
|
|
@see FileStatus#getBlockSize()]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumReduceTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get configured the number of reduce tasks for this job. Defaults to
|
|
<code>1</code>.
|
|
|
|
@return the number of reduce tasks for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setNumReduceTasks"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="n" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the requisite number of reduce tasks for this job.
|
|
|
|
<h4 id="NoOfReduces">How many reduces?</h4>
|
|
|
|
<p>The right number of reduces seems to be <code>0.95</code> or
|
|
<code>1.75</code> multiplied by (<<i>no. of nodes</i>> *
|
|
<a href="{@docRoot}/../mapred-default.html#mapreduce.tasktracker.reduce.tasks.maximum">
|
|
mapreduce.tasktracker.reduce.tasks.maximum</a>).
|
|
</p>
|
|
|
|
<p>With <code>0.95</code> all of the reduces can launch immediately and
|
|
start transfering map outputs as the maps finish. With <code>1.75</code>
|
|
the faster nodes will finish their first round of reduces and launch a
|
|
second wave of reduces doing a much better job of load balancing.</p>
|
|
|
|
<p>Increasing the number of reduces increases the framework overhead, but
|
|
increases load balancing and lowers the cost of failures.</p>
|
|
|
|
<p>The scaling factors above are slightly less than whole numbers to
|
|
reserve a few reduce slots in the framework for speculative-tasks, failures
|
|
etc.</p>
|
|
|
|
<h4 id="ReducerNone">Reducer NONE</h4>
|
|
|
|
<p>It is legal to set the number of reduce-tasks to <code>zero</code>.</p>
|
|
|
|
<p>In this case the output of the map-tasks directly go to distributed
|
|
file-system, to the path set by
|
|
{@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the
|
|
framework doesn't sort the map-outputs before writing it out to HDFS.</p>
|
|
|
|
@param n the number of reduce tasks for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxMapAttempts" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the configured number of maximum attempts that will be made to run a
|
|
map task, as specified by the <code>mapreduce.map.maxattempts</code>
|
|
property. If this property is not already set, the default is 4 attempts.
|
|
|
|
@return the max number of attempts per map task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxMapAttempts"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="n" type="int"/>
|
|
<doc>
|
|
<![CDATA[Expert: Set the number of maximum attempts that will be made to run a
|
|
map task.
|
|
|
|
@param n the number of attempts per map task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxReduceAttempts" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the configured number of maximum attempts that will be made to run a
|
|
reduce task, as specified by the <code>mapreduce.reduce.maxattempts</code>
|
|
property. If this property is not already set, the default is 4 attempts.
|
|
|
|
@return the max number of attempts per reduce task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxReduceAttempts"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="n" type="int"/>
|
|
<doc>
|
|
<![CDATA[Expert: Set the number of maximum attempts that will be made to run a
|
|
reduce task.
|
|
|
|
@param n the number of attempts per reduce task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user-specified job name. This is only used to identify the
|
|
job to the user.
|
|
|
|
@return the job's name, defaulting to "".]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the user-specified job name.
|
|
|
|
@param name the job's new name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSessionId" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user-specified session identifier. The default is the empty string.
|
|
|
|
The session identifier is used to tag metric data that is reported to some
|
|
performance metrics system via the org.apache.hadoop.metrics API. The
|
|
session identifier is intended, in particular, for use by Hadoop-On-Demand
|
|
(HOD) which allocates a virtual Hadoop cluster dynamically and transiently.
|
|
HOD will set the session identifier by modifying the mapred-site.xml file
|
|
before starting the cluster.
|
|
|
|
When not running under HOD, this identifer is expected to remain set to
|
|
the empty string.
|
|
|
|
@return the session identifier, defaulting to "".]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSessionId"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="sessionId" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the user-specified session identifier.
|
|
|
|
@param sessionId the new session id.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxTaskFailuresPerTracker"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="noFailures" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the maximum no. of failures of a given job per tasktracker.
|
|
If the no. of task failures exceeds <code>noFailures</code>, the
|
|
tasktracker is <i>blacklisted</i> for this job.
|
|
|
|
@param noFailures maximum no. of failures of a given job per tasktracker.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxTaskFailuresPerTracker" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Expert: Get the maximum no. of failures of a given job per tasktracker.
|
|
If the no. of task failures exceeds this, the tasktracker is
|
|
<i>blacklisted</i> for this job.
|
|
|
|
@return the maximum no. of failures of a given job per tasktracker.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxMapTaskFailuresPercent" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the maximum percentage of map tasks that can fail without
|
|
the job being aborted.
|
|
|
|
Each map task is executed a minimum of {@link #getMaxMapAttempts()}
|
|
attempts before being declared as <i>failed</i>.
|
|
|
|
Defaults to <code>zero</code>, i.e. <i>any</i> failed map-task results in
|
|
the job being declared as {@link JobStatus#FAILED}.
|
|
|
|
@return the maximum percentage of map tasks that can fail without
|
|
the job being aborted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxMapTaskFailuresPercent"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="percent" type="int"/>
|
|
<doc>
|
|
<![CDATA[Expert: Set the maximum percentage of map tasks that can fail without the
|
|
job being aborted.
|
|
|
|
Each map task is executed a minimum of {@link #getMaxMapAttempts} attempts
|
|
before being declared as <i>failed</i>.
|
|
|
|
@param percent the maximum percentage of map tasks that can fail without
|
|
the job being aborted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxReduceTaskFailuresPercent" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the maximum percentage of reduce tasks that can fail without
|
|
the job being aborted.
|
|
|
|
Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()}
|
|
attempts before being declared as <i>failed</i>.
|
|
|
|
Defaults to <code>zero</code>, i.e. <i>any</i> failed reduce-task results
|
|
in the job being declared as {@link JobStatus#FAILED}.
|
|
|
|
@return the maximum percentage of reduce tasks that can fail without
|
|
the job being aborted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxReduceTaskFailuresPercent"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="percent" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the maximum percentage of reduce tasks that can fail without the job
|
|
being aborted.
|
|
|
|
Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()}
|
|
attempts before being declared as <i>failed</i>.
|
|
|
|
@param percent the maximum percentage of reduce tasks that can fail without
|
|
the job being aborted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobPriority"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="prio" type="org.apache.hadoop.mapred.JobPriority"/>
|
|
<doc>
|
|
<![CDATA[Set {@link JobPriority} for this job.
|
|
|
|
@param prio the {@link JobPriority} for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobPriority" return="org.apache.hadoop.mapred.JobPriority"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link JobPriority} for this job.
|
|
|
|
@return the {@link JobPriority} for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProfileEnabled" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get whether the task profiling is enabled.
|
|
@return true if some tasks will be profiled]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setProfileEnabled"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="newValue" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the system should collect profiler information for some of
|
|
the tasks in this job? The information is stored in the user log
|
|
directory.
|
|
@param newValue true means it should be gathered]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProfileParams" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the profiler configuration arguments.
|
|
|
|
The default value for this property is
|
|
"-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s"
|
|
|
|
@return the parameters to pass to the task child to configure profiling]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setProfileParams"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="value" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the profiler configuration arguments. If the string contains a '%s' it
|
|
will be replaced with the name of the profiling output file when the task
|
|
runs.
|
|
|
|
This value is passed to the task child JVM on the command line.
|
|
|
|
@param value the configuration string]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProfileTaskRange" return="org.apache.hadoop.conf.Configuration.IntegerRanges"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="isMap" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Get the range of maps or reduces to profile.
|
|
@param isMap is the task a map?
|
|
@return the task ranges]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setProfileTaskRange"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="isMap" type="boolean"/>
|
|
<param name="newValue" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the ranges of maps or reduces to profile. setProfileEnabled(true)
|
|
must also be called.
|
|
@param newValue a set of integer ranges of the map ids]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapDebugScript"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="mDbgScript" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the debug script to run when the map tasks fail.
|
|
|
|
<p>The debug script can aid debugging of failed map tasks. The script is
|
|
given task's stdout, stderr, syslog, jobconf files as arguments.</p>
|
|
|
|
<p>The debug command, run on the node where the map failed, is:</p>
|
|
<p><pre><blockquote>
|
|
$script $stdout $stderr $syslog $jobconf.
|
|
</blockquote></pre></p>
|
|
|
|
<p> The script file is distributed through {@link DistributedCache}
|
|
APIs. The script needs to be symlinked. </p>
|
|
|
|
<p>Here is an example on how to submit a script
|
|
<p><blockquote><pre>
|
|
job.setMapDebugScript("./myscript");
|
|
DistributedCache.createSymlink(job);
|
|
DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
|
|
</pre></blockquote></p>
|
|
|
|
@param mDbgScript the script name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapDebugScript" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the map task's debug script.
|
|
|
|
@return the debug Script for the mapred job for failed map tasks.
|
|
@see #setMapDebugScript(String)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setReduceDebugScript"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="rDbgScript" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the debug script to run when the reduce tasks fail.
|
|
|
|
<p>The debug script can aid debugging of failed reduce tasks. The script
|
|
is given task's stdout, stderr, syslog, jobconf files as arguments.</p>
|
|
|
|
<p>The debug command, run on the node where the map failed, is:</p>
|
|
<p><pre><blockquote>
|
|
$script $stdout $stderr $syslog $jobconf.
|
|
</blockquote></pre></p>
|
|
|
|
<p> The script file is distributed through {@link DistributedCache}
|
|
APIs. The script file needs to be symlinked </p>
|
|
|
|
<p>Here is an example on how to submit a script
|
|
<p><blockquote><pre>
|
|
job.setReduceDebugScript("./myscript");
|
|
DistributedCache.createSymlink(job);
|
|
DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
|
|
</pre></blockquote></p>
|
|
|
|
@param rDbgScript the script name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReduceDebugScript" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the reduce task's debug Script
|
|
|
|
@return the debug script for the mapred job for failed reduce tasks.
|
|
@see #setReduceDebugScript(String)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobEndNotificationURI" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the uri to be invoked in-order to send a notification after the job
|
|
has completed (success/failure).
|
|
|
|
@return the job end notification uri, <code>null</code> if it hasn't
|
|
been set.
|
|
@see #setJobEndNotificationURI(String)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobEndNotificationURI"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="uri" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the uri to be invoked in-order to send a notification after the job
|
|
has completed (success/failure).
|
|
|
|
<p>The uri can contain 2 special parameters: <tt>$jobId</tt> and
|
|
<tt>$jobStatus</tt>. Those, if present, are replaced by the job's
|
|
identifier and completion-status respectively.</p>
|
|
|
|
<p>This is typically used by application-writers to implement chaining of
|
|
Map-Reduce jobs in an <i>asynchronous manner</i>.</p>
|
|
|
|
@param uri the job end notification uri
|
|
@see JobStatus
|
|
@see <a href="{@docRoot}/org/apache/hadoop/mapred/JobClient.html#
|
|
JobCompletionAndChaining">Job Completion and Chaining</a>]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobLocalDir" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get job-specific shared directory for use as scratch space
|
|
|
|
<p>
|
|
When a job starts, a shared directory is created at location
|
|
<code>
|
|
${mapreduce.cluster.local.dir}/taskTracker/$user/jobcache/$jobid/work/ </code>.
|
|
This directory is exposed to the users through
|
|
<code>mapreduce.job.local.dir </code>.
|
|
So, the tasks can use this space
|
|
as scratch space and share files among them. </p>
|
|
This value is available as System property also.
|
|
|
|
@return The localized job specific shared directory]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMemoryForMapTask" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get memory required to run a map task of the job, in MB.
|
|
|
|
If a value is specified in the configuration, it is returned.
|
|
Else, it returns {@link #DISABLED_MEMORY_LIMIT}.
|
|
<p/>
|
|
For backward compatibility, if the job configuration sets the
|
|
key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different
|
|
from {@link #DISABLED_MEMORY_LIMIT}, that value will be used
|
|
after converting it from bytes to MB.
|
|
@return memory required to run a map task of the job, in MB,
|
|
or {@link #DISABLED_MEMORY_LIMIT} if unset.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMemoryForMapTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="mem" type="long"/>
|
|
</method>
|
|
<method name="getMemoryForReduceTask" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get memory required to run a reduce task of the job, in MB.
|
|
|
|
If a value is specified in the configuration, it is returned.
|
|
Else, it returns {@link #DISABLED_MEMORY_LIMIT}.
|
|
<p/>
|
|
For backward compatibility, if the job configuration sets the
|
|
key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different
|
|
from {@link #DISABLED_MEMORY_LIMIT}, that value will be used
|
|
after converting it from bytes to MB.
|
|
@return memory required to run a reduce task of the job, in MB,
|
|
or {@link #DISABLED_MEMORY_LIMIT} if unset.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMemoryForReduceTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="mem" type="long"/>
|
|
</method>
|
|
<method name="getQueueName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the name of the queue to which this job is submitted.
|
|
Defaults to 'default'.
|
|
|
|
@return name of the queue]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setQueueName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="queueName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the name of the queue to which this job should be submitted.
|
|
|
|
@param queueName Name of the queue]]>
|
|
</doc>
|
|
</method>
|
|
<method name="normalizeMemoryConfigValue" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="long"/>
|
|
<doc>
|
|
<![CDATA[Normalize the negative values in configuration
|
|
|
|
@param val
|
|
@return normalized value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxVirtualMemoryForTask" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link #getMemoryForMapTask()} and
|
|
{@link #getMemoryForReduceTask()}">
|
|
<doc>
|
|
<![CDATA[Get the memory required to run a task of this job, in bytes. See
|
|
{@link #MAPRED_TASK_MAXVMEM_PROPERTY}
|
|
<p/>
|
|
This method is deprecated. Now, different memory limits can be
|
|
set for map and reduce tasks of a job, in MB.
|
|
<p/>
|
|
For backward compatibility, if the job configuration sets the
|
|
key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different
|
|
from {@link #DISABLED_MEMORY_LIMIT}, that value is returned.
|
|
Otherwise, this method will return the larger of the values returned by
|
|
{@link #getMemoryForMapTask()} and {@link #getMemoryForReduceTask()}
|
|
after converting them into bytes.
|
|
|
|
@return Memory required to run a task of this job, in bytes,
|
|
or {@link #DISABLED_MEMORY_LIMIT}, if unset.
|
|
@see #setMaxVirtualMemoryForTask(long)
|
|
@deprecated Use {@link #getMemoryForMapTask()} and
|
|
{@link #getMemoryForReduceTask()}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxVirtualMemoryForTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link #setMemoryForMapTask(long mem)} and
|
|
Use {@link #setMemoryForReduceTask(long mem)}">
|
|
<param name="vmem" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the maximum amount of memory any task of this job can use. See
|
|
{@link #MAPRED_TASK_MAXVMEM_PROPERTY}
|
|
<p/>
|
|
mapred.task.maxvmem is split into
|
|
mapreduce.map.memory.mb
|
|
and mapreduce.map.memory.mb,mapred
|
|
each of the new key are set
|
|
as mapred.task.maxvmem / 1024
|
|
as new values are in MB
|
|
|
|
@param vmem Maximum amount of virtual memory in bytes any task of this job
|
|
can use.
|
|
@see #getMaxVirtualMemoryForTask()
|
|
@deprecated
|
|
Use {@link #setMemoryForMapTask(long mem)} and
|
|
Use {@link #setMemoryForReduceTask(long mem)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxPhysicalMemoryForTask" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="this variable is deprecated and nolonger in use.">
|
|
<doc>
|
|
<![CDATA[@deprecated this variable is deprecated and nolonger in use.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxPhysicalMemoryForTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="mem" type="long"/>
|
|
</method>
|
|
<field name="MAPRED_TASK_MAXVMEM_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="Use {@link #MAPRED_JOB_MAP_MEMORY_MB_PROPERTY} and
|
|
{@link #MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY}">
|
|
<doc>
|
|
<![CDATA[@deprecated Use {@link #MAPRED_JOB_MAP_MEMORY_MB_PROPERTY} and
|
|
{@link #MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY}]]>
|
|
</doc>
|
|
</field>
|
|
<field name="UPPER_LIMIT_ON_TASK_VMEM_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="deprecated, no comment">
|
|
<doc>
|
|
<![CDATA[@deprecated]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="deprecated, no comment">
|
|
<doc>
|
|
<![CDATA[@deprecated]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_TASK_MAXPMEM_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="deprecated, no comment">
|
|
<doc>
|
|
<![CDATA[@deprecated]]>
|
|
</doc>
|
|
</field>
|
|
<field name="DISABLED_MEMORY_LIMIT" type="long"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[A value which if set for memory related configuration options,
|
|
indicates that the options are turned off.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_LOCAL_DIR_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Property name for the configuration property mapreduce.cluster.local.dir]]>
|
|
</doc>
|
|
</field>
|
|
<field name="DEFAULT_QUEUE_NAME" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Name of the queue to which jobs will be submitted, if no queue
|
|
name is mentioned.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="UNPACK_JAR_PATTERN_DEFAULT" type="java.util.regex.Pattern"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Pattern for the default unpacking behavior for job jars]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_TASK_JAVA_OPTS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="Use {@link #MAPRED_MAP_TASK_JAVA_OPTS} or
|
|
{@link #MAPRED_REDUCE_TASK_JAVA_OPTS}">
|
|
<doc>
|
|
<![CDATA[Configuration key to set the java command line options for the child
|
|
map and reduce tasks.
|
|
|
|
Java opts for the task tracker child processes.
|
|
The following symbol, if present, will be interpolated: @taskid@.
|
|
It is replaced by current TaskID. Any other occurrences of '@' will go
|
|
unchanged.
|
|
For example, to enable verbose gc logging to a file named for the taskid in
|
|
/tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
|
|
-Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
|
|
|
|
The configuration variable {@link #MAPRED_TASK_ULIMIT} can be used to
|
|
control the maximum virtual memory of the child processes.
|
|
|
|
The configuration variable {@link #MAPRED_TASK_ENV} can be used to pass
|
|
other environment variables to the child processes.
|
|
|
|
@deprecated Use {@link #MAPRED_MAP_TASK_JAVA_OPTS} or
|
|
{@link #MAPRED_REDUCE_TASK_JAVA_OPTS}]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_MAP_TASK_JAVA_OPTS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Configuration key to set the java command line options for the map tasks.
|
|
|
|
Java opts for the task tracker child map processes.
|
|
The following symbol, if present, will be interpolated: @taskid@.
|
|
It is replaced by current TaskID. Any other occurrences of '@' will go
|
|
unchanged.
|
|
For example, to enable verbose gc logging to a file named for the taskid in
|
|
/tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
|
|
-Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
|
|
|
|
The configuration variable {@link #MAPRED_MAP_TASK_ULIMIT} can be used to
|
|
control the maximum virtual memory of the map processes.
|
|
|
|
The configuration variable {@link #MAPRED_MAP_TASK_ENV} can be used to pass
|
|
other environment variables to the map processes.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_REDUCE_TASK_JAVA_OPTS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Configuration key to set the java command line options for the reduce tasks.
|
|
|
|
Java opts for the task tracker child reduce processes.
|
|
The following symbol, if present, will be interpolated: @taskid@.
|
|
It is replaced by current TaskID. Any other occurrences of '@' will go
|
|
unchanged.
|
|
For example, to enable verbose gc logging to a file named for the taskid in
|
|
/tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
|
|
-Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
|
|
|
|
The configuration variable {@link #MAPRED_REDUCE_TASK_ULIMIT} can be used
|
|
to control the maximum virtual memory of the reduce processes.
|
|
|
|
The configuration variable {@link #MAPRED_REDUCE_TASK_ENV} can be used to
|
|
pass process environment variables to the reduce processes.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="DEFAULT_MAPRED_TASK_JAVA_OPTS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="MAPRED_TASK_ULIMIT" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="Use {@link #MAPRED_MAP_TASK_ULIMIT} or
|
|
{@link #MAPRED_REDUCE_TASK_ULIMIT}">
|
|
<doc>
|
|
<![CDATA[Configuration key to set the maximum virutal memory available to the child
|
|
map and reduce tasks (in kilo-bytes).
|
|
|
|
Note: This must be greater than or equal to the -Xmx passed to the JavaVM
|
|
via {@link #MAPRED_TASK_JAVA_OPTS}, else the VM might not start.
|
|
|
|
@deprecated Use {@link #MAPRED_MAP_TASK_ULIMIT} or
|
|
{@link #MAPRED_REDUCE_TASK_ULIMIT}]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_MAP_TASK_ULIMIT" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Configuration key to set the maximum virutal memory available to the
|
|
map tasks (in kilo-bytes).
|
|
|
|
Note: This must be greater than or equal to the -Xmx passed to the JavaVM
|
|
via {@link #MAPRED_MAP_TASK_JAVA_OPTS}, else the VM might not start.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_REDUCE_TASK_ULIMIT" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Configuration key to set the maximum virutal memory available to the
|
|
reduce tasks (in kilo-bytes).
|
|
|
|
Note: This must be greater than or equal to the -Xmx passed to the JavaVM
|
|
via {@link #MAPRED_REDUCE_TASK_JAVA_OPTS}, else the VM might not start.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_TASK_ENV" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="Use {@link #MAPRED_MAP_TASK_ENV} or
|
|
{@link #MAPRED_REDUCE_TASK_ENV}">
|
|
<doc>
|
|
<![CDATA[Configuration key to set the environment of the child map/reduce tasks.
|
|
|
|
The format of the value is <code>k1=v1,k2=v2</code>. Further it can
|
|
reference existing environment variables via <code>$key</code>.
|
|
|
|
Example:
|
|
<ul>
|
|
<li> A=foo - This will set the env variable A to foo. </li>
|
|
<li> B=$X:c This is inherit tasktracker's X env variable. </li>
|
|
</ul>
|
|
|
|
@deprecated Use {@link #MAPRED_MAP_TASK_ENV} or
|
|
{@link #MAPRED_REDUCE_TASK_ENV}]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_MAP_TASK_ENV" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Configuration key to set the maximum virutal memory available to the
|
|
map tasks.
|
|
|
|
The format of the value is <code>k1=v1,k2=v2</code>. Further it can
|
|
reference existing environment variables via <code>$key</code>.
|
|
|
|
Example:
|
|
<ul>
|
|
<li> A=foo - This will set the env variable A to foo. </li>
|
|
<li> B=$X:c This is inherit tasktracker's X env variable. </li>
|
|
</ul>]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_REDUCE_TASK_ENV" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Configuration key to set the maximum virutal memory available to the
|
|
reduce tasks.
|
|
|
|
The format of the value is <code>k1=v1,k2=v2</code>. Further it can
|
|
reference existing environment variables via <code>$key</code>.
|
|
|
|
Example:
|
|
<ul>
|
|
<li> A=foo - This will set the env variable A to foo. </li>
|
|
<li> B=$X:c This is inherit tasktracker's X env variable. </li>
|
|
</ul>]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_MAP_TASK_LOG_LEVEL" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Configuration key to set the logging {@link Level} for the map task.
|
|
|
|
The allowed logging levels are:
|
|
OFF, FATAL, ERROR, WARN, INFO, DEBUG, TRACE and ALL.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_REDUCE_TASK_LOG_LEVEL" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Configuration key to set the logging {@link Level} for the reduce task.
|
|
|
|
The allowed logging levels are:
|
|
OFF, FATAL, ERROR, WARN, INFO, DEBUG, TRACE and ALL.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="DEFAULT_LOG_LEVEL" type="org.apache.log4j.Level"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Default logging level for map/reduce tasks.]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A map/reduce job configuration.
|
|
|
|
<p><code>JobConf</code> is the primary interface for a user to describe a
|
|
map-reduce job to the Hadoop framework for execution. The framework tries to
|
|
faithfully execute the job as-is described by <code>JobConf</code>, however:
|
|
<ol>
|
|
<li>
|
|
Some configuration parameters might have been marked as
|
|
<a href="{@docRoot}/org/apache/hadoop/conf/Configuration.html#FinalParams">
|
|
final</a> by administrators and hence cannot be altered.
|
|
</li>
|
|
<li>
|
|
While some job parameters are straight-forward to set
|
|
(e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly
|
|
rest of the framework and/or job-configuration and is relatively more
|
|
complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}).
|
|
</li>
|
|
</ol></p>
|
|
|
|
<p><code>JobConf</code> typically specifies the {@link Mapper}, combiner
|
|
(if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and
|
|
{@link OutputFormat} implementations to be used etc.
|
|
|
|
<p>Optionally <code>JobConf</code> is used to specify other advanced facets
|
|
of the job such as <code>Comparator</code>s to be used, files to be put in
|
|
the {@link DistributedCache}, whether or not intermediate and/or job outputs
|
|
are to be compressed (and how), debugability via user-provided scripts
|
|
( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}),
|
|
for doing post-processing on task logs, task's stdout, stderr, syslog.
|
|
and etc.</p>
|
|
|
|
<p>Here is an example on how to configure a job via <code>JobConf</code>:</p>
|
|
<p><blockquote><pre>
|
|
// Create a new JobConf
|
|
JobConf job = new JobConf(new Configuration(), MyJob.class);
|
|
|
|
// Specify various job-specific parameters
|
|
job.setJobName("myjob");
|
|
|
|
FileInputFormat.setInputPaths(job, new Path("in"));
|
|
FileOutputFormat.setOutputPath(job, new Path("out"));
|
|
|
|
job.setMapperClass(MyJob.MyMapper.class);
|
|
job.setCombinerClass(MyJob.MyReducer.class);
|
|
job.setReducerClass(MyJob.MyReducer.class);
|
|
|
|
job.setInputFormat(SequenceFileInputFormat.class);
|
|
job.setOutputFormat(SequenceFileOutputFormat.class);
|
|
</pre></blockquote></p>
|
|
|
|
@see JobClient
|
|
@see ClusterStatus
|
|
@see Tool
|
|
@see DistributedCache]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobConf -->
|
|
<!-- start interface org.apache.hadoop.mapred.JobConfigurable -->
|
|
<interface name="JobConfigurable" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Initializes a new instance from a {@link JobConf}.
|
|
|
|
@param job the configuration]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[That what may be configured.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.JobConfigurable -->
|
|
<!-- start interface org.apache.hadoop.mapred.JobContext -->
|
|
<interface name="JobContext" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<method name="getJobConf" return="org.apache.hadoop.mapred.JobConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the job Configuration
|
|
|
|
@return JobConf]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgressible" return="org.apache.hadoop.util.Progressable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the progress mechanism for reporting progress.
|
|
|
|
@return progress mechanism]]>
|
|
</doc>
|
|
</method>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.JobContext -->
|
|
<!-- start class org.apache.hadoop.mapred.JobID -->
|
|
<class name="JobID" extends="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobID" type="java.lang.String, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a JobID object
|
|
@param jtIdentifier jobTracker identifier
|
|
@param id job number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobID"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="downgrade" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="old" type="org.apache.hadoop.mapreduce.JobID"/>
|
|
<doc>
|
|
<![CDATA[Downgrade a new JobID to an old one
|
|
@param old a new or old JobID
|
|
@return either old or a new JobID build to match old]]>
|
|
</doc>
|
|
</method>
|
|
<method name="read" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="forName" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="str" type="java.lang.String"/>
|
|
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
|
|
<doc>
|
|
<![CDATA[Construct a JobId object from given string
|
|
@return constructed JobId object or null if the given String is null
|
|
@throws IllegalArgumentException if the given string is malformed]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobIDsPattern" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jtIdentifier" type="java.lang.String"/>
|
|
<param name="jobId" type="java.lang.Integer"/>
|
|
<doc>
|
|
<![CDATA[Returns a regex pattern which matches task IDs. Arguments can
|
|
be given null, in which case that part of the regex will be generic.
|
|
For example to obtain a regex matching <i>any job</i>
|
|
run on the jobtracker started at <i>200707121733</i>, we would use :
|
|
<pre>
|
|
JobID.getTaskIDsPattern("200707121733", null);
|
|
</pre>
|
|
which will return :
|
|
<pre> "job_200707121733_[0-9]*" </pre>
|
|
@param jtIdentifier jobTracker identifier, or null
|
|
@param jobId job number, or null
|
|
@return a regex pattern matching JobIDs]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[JobID represents the immutable and unique identifier for
|
|
the job. JobID consists of two parts. First part
|
|
represents the jobtracker identifier, so that jobID to jobtracker map
|
|
is defined. For cluster setup this string is the jobtracker
|
|
start time, for local setting, it is "local".
|
|
Second part of the JobID is the job number. <br>
|
|
An example JobID is :
|
|
<code>job_200707121733_0003</code> , which represents the third job
|
|
running at the jobtracker started at <code>200707121733</code>.
|
|
<p>
|
|
Applications should never construct or parse JobID strings, but rather
|
|
use appropriate constructors or {@link #forName(String)} method.
|
|
|
|
@see TaskID
|
|
@see TaskAttemptID]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobID -->
|
|
<!-- start class org.apache.hadoop.mapred.JobPriority -->
|
|
<class name="JobPriority" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapred.JobPriority[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapred.JobPriority"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Used to describe the priority of the running job.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobPriority -->
|
|
<!-- start class org.apache.hadoop.mapred.JobQueueInfo -->
|
|
<class name="JobQueueInfo" extends="org.apache.hadoop.mapreduce.QueueInfo"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobQueueInfo"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Default constructor for Job Queue Info.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobQueueInfo" type="java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a new JobQueueInfo object using the queue name and the
|
|
scheduling information passed.
|
|
|
|
@param queueName Name of the job queue
|
|
@param schedulingInfo Scheduling Information associated with the job
|
|
queue]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="setQueueName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="queueName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the queue name of the JobQueueInfo
|
|
|
|
@param queueName Name of the job queue.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSchedulingInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="schedulingInfo" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the scheduling information associated to particular job queue
|
|
|
|
@param schedulingInfo]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setQueueState"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="state" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the state of the queue
|
|
@param state state of the queue.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setChildren"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="children" type="java.util.List"/>
|
|
</method>
|
|
<method name="getChildren" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setProperties"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="props" type="java.util.Properties"/>
|
|
</method>
|
|
<method name="setJobStatuses"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="stats" type="org.apache.hadoop.mapreduce.JobStatus[]"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Class that contains the information regarding the Job Queues which are
|
|
maintained by the Hadoop Map/Reduce framework.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobQueueInfo -->
|
|
<!-- start class org.apache.hadoop.mapred.JobStatus -->
|
|
<class name="JobStatus" extends="org.apache.hadoop.mapreduce.JobStatus"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobStatus"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a job status object for a given jobid.
|
|
@param jobid The jobid of the job
|
|
@param mapProgress The progress made on the maps
|
|
@param reduceProgress The progress made on the reduces
|
|
@param cleanupProgress The progress made on cleanup
|
|
@param runState The current state of the job
|
|
@param user userid of the person who submitted the job.
|
|
@param jobName user-specified job name.
|
|
@param jobFile job configuration file.
|
|
@param trackingUrl link to the web-ui for details of the job.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, int, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a job status object for a given jobid.
|
|
@param jobid The jobid of the job
|
|
@param mapProgress The progress made on the maps
|
|
@param reduceProgress The progress made on the reduces
|
|
@param runState The current state of the job
|
|
@param user userid of the person who submitted the job.
|
|
@param jobName user-specified job name.
|
|
@param jobFile job configuration file.
|
|
@param trackingUrl link to the web-ui for details of the job.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a job status object for a given jobid.
|
|
@param jobid The jobid of the job
|
|
@param mapProgress The progress made on the maps
|
|
@param reduceProgress The progress made on the reduces
|
|
@param runState The current state of the job
|
|
@param jp Priority of the job.
|
|
@param user userid of the person who submitted the job.
|
|
@param jobName user-specified job name.
|
|
@param jobFile job configuration file.
|
|
@param trackingUrl link to the web-ui for details of the job.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a job status object for a given jobid.
|
|
@param jobid The jobid of the job
|
|
@param setupProgress The progress made on the setup
|
|
@param mapProgress The progress made on the maps
|
|
@param reduceProgress The progress made on the reduces
|
|
@param cleanupProgress The progress made on the cleanup
|
|
@param runState The current state of the job
|
|
@param jp Priority of the job.
|
|
@param user userid of the person who submitted the job.
|
|
@param jobName user-specified job name.
|
|
@param jobFile job configuration file.
|
|
@param trackingUrl link to the web-ui for details of the job.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getJobRunState" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="state" type="int"/>
|
|
<doc>
|
|
<![CDATA[Helper method to get human-readable state of the job.
|
|
@param state job state
|
|
@return human-readable state of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="downgrade" return="org.apache.hadoop.mapred.JobStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="stat" type="org.apache.hadoop.mapreduce.JobStatus"/>
|
|
</method>
|
|
<method name="getJobId" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="use getJobID instead">
|
|
<doc>
|
|
<![CDATA[@deprecated use getJobID instead]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobID" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return The jobid of the Job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobPriority" return="org.apache.hadoop.mapred.JobPriority"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the priority of the job
|
|
@return job priority]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapProgress"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="p" type="float"/>
|
|
<doc>
|
|
<![CDATA[Sets the map progress of this job
|
|
@param p The value of map progress to set to]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setCleanupProgress"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="p" type="float"/>
|
|
<doc>
|
|
<![CDATA[Sets the cleanup progress of this job
|
|
@param p The value of cleanup progress to set to]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSetupProgress"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="p" type="float"/>
|
|
<doc>
|
|
<![CDATA[Sets the setup progress of this job
|
|
@param p The value of setup progress to set to]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setReduceProgress"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="p" type="float"/>
|
|
<doc>
|
|
<![CDATA[Sets the reduce progress of this Job
|
|
@param p The value of reduce progress to set to]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setFinishTime"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="finishTime" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the finish time of the job
|
|
@param finishTime The finishTime of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setHistoryFile"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="historyFile" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the job history file url for a completed job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTrackingUrl"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="trackingUrl" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the link to the web-ui for details of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setRetired"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Set the job retire flag to true.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setRunState"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="state" type="int"/>
|
|
<doc>
|
|
<![CDATA[Change the current run state of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRunState" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return running state of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setStartTime"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="startTime" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the start time of the job
|
|
@param startTime The startTime of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setUsername"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="userName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[@param userName The username of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSchedulingInfo"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="schedulingInfo" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Used to set the scheduling information associated to a particular Job.
|
|
|
|
@param schedulingInfo Scheduling information of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobACLs"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="acls" type="java.util.Map"/>
|
|
</method>
|
|
<method name="setJobPriority"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jp" type="org.apache.hadoop.mapred.JobPriority"/>
|
|
<doc>
|
|
<![CDATA[Set the priority of the job, defaulting to NORMAL.
|
|
@param jp new job priority]]>
|
|
</doc>
|
|
</method>
|
|
<method name="mapProgress" return="float"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Percentage of progress in maps]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanupProgress" return="float"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Percentage of progress in cleanup]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setupProgress" return="float"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Percentage of progress in setup]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reduceProgress" return="float"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Percentage of progress in reduce]]>
|
|
</doc>
|
|
</method>
|
|
<field name="RUNNING" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="SUCCEEDED" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="FAILED" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="PREP" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="KILLED" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[Describes the current status of a job. This is
|
|
not intended to be a comprehensive piece of data.
|
|
For that, look at JobProfile.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobStatus -->
|
|
<!-- start class org.apache.hadoop.mapred.KeyValueLineRecordReader -->
|
|
<class name="KeyValueLineRecordReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordReader"/>
|
|
<constructor name="KeyValueLineRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="getKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="createKey" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="createValue" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="findSeparator" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="utf" type="byte[]"/>
|
|
<param name="start" type="int"/>
|
|
<param name="length" type="int"/>
|
|
<param name="sep" type="byte"/>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.Text"/>
|
|
<param name="value" type="org.apache.hadoop.io.Text"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read key/value pair in a line.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class treats a line in the input as a key/value pair separated by a
|
|
separator character. The separator can be specified in config file
|
|
under the attribute name mapreduce.input.keyvaluelinerecordreader.key.value.separator. The default
|
|
separator is the tab character ('\t').]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.KeyValueLineRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.KeyValueTextInputFormat -->
|
|
<class name="KeyValueTextInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<constructor name="KeyValueTextInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="isSplitable" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="file" type="org.apache.hadoop.fs.Path"/>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines.
|
|
Either linefeed or carriage-return are used to signal end of line. Each line
|
|
is divided into key and value parts by a separator byte. If no such a byte
|
|
exists, the key will be the entire line and value will be empty.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.KeyValueTextInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.LineRecordReader.LineReader -->
|
|
<class name="LineRecordReader.LineReader" extends="org.apache.hadoop.util.LineReader"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="LineRecordReader.LineReader" type="java.io.InputStream, org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[A class that provides a line reader from an input stream.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.LineRecordReader.LineReader -->
|
|
<!-- start class org.apache.hadoop.mapred.MapFileOutputFormat -->
|
|
<class name="MapFileOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MapFileOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getReaders" return="org.apache.hadoop.io.MapFile.Reader[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="dir" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Open the output generated by this format.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getEntry" return="org.apache.hadoop.io.Writable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="readers" type="org.apache.hadoop.io.MapFile.Reader[]"/>
|
|
<param name="partitioner" type="org.apache.hadoop.mapred.Partitioner"/>
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get an entry from output generated by this class.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link OutputFormat} that writes {@link MapFile}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.MapFileOutputFormat -->
|
|
<!-- start interface org.apache.hadoop.mapred.Mapper -->
|
|
<interface name="Mapper" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<implements name="org.apache.hadoop.io.Closeable"/>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K1"/>
|
|
<param name="value" type="V1"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Maps a single input key/value pair into an intermediate key/value pair.
|
|
|
|
<p>Output pairs need not be of the same types as input pairs. A given
|
|
input pair may map to zero or many output pairs. Output pairs are
|
|
collected with calls to
|
|
{@link OutputCollector#collect(Object,Object)}.</p>
|
|
|
|
<p>Applications can use the {@link Reporter} provided to report progress
|
|
or just indicate that they are alive. In scenarios where the application
|
|
takes an insignificant amount of time to process individual key/value
|
|
pairs, this is crucial since the framework might assume that the task has
|
|
timed-out and kill that task. The other way of avoiding this is to set
|
|
<a href="{@docRoot}/../mapred-default.html#mapreduce.task.timeout">
|
|
mapreduce.task.timeout</a> to a high-enough value (or even zero for no
|
|
time-outs).</p>
|
|
|
|
@param key the input key.
|
|
@param value the input value.
|
|
@param output collects mapped keys and values.
|
|
@param reporter facility to report progress.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Maps input key/value pairs to a set of intermediate key/value pairs.
|
|
|
|
<p>Maps are the individual tasks which transform input records into a
|
|
intermediate records. The transformed intermediate records need not be of
|
|
the same type as the input records. A given input pair may map to zero or
|
|
many output pairs.</p>
|
|
|
|
<p>The Hadoop Map-Reduce framework spawns one map task for each
|
|
{@link InputSplit} generated by the {@link InputFormat} for the job.
|
|
<code>Mapper</code> implementations can access the {@link JobConf} for the
|
|
job via the {@link JobConfigurable#configure(JobConf)} and initialize
|
|
themselves. Similarly they can use the {@link Closeable#close()} method for
|
|
de-initialization.</p>
|
|
|
|
<p>The framework then calls
|
|
{@link #map(Object, Object, OutputCollector, Reporter)}
|
|
for each key/value pair in the <code>InputSplit</code> for that task.</p>
|
|
|
|
<p>All intermediate values associated with a given output key are
|
|
subsequently grouped by the framework, and passed to a {@link Reducer} to
|
|
determine the final output. Users can control the grouping by specifying
|
|
a <code>Comparator</code> via
|
|
{@link JobConf#setOutputKeyComparatorClass(Class)}.</p>
|
|
|
|
<p>The grouped <code>Mapper</code> outputs are partitioned per
|
|
<code>Reducer</code>. Users can control which keys (and hence records) go to
|
|
which <code>Reducer</code> by implementing a custom {@link Partitioner}.
|
|
|
|
<p>Users can optionally specify a <code>combiner</code>, via
|
|
{@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the
|
|
intermediate outputs, which helps to cut down the amount of data transferred
|
|
from the <code>Mapper</code> to the <code>Reducer</code>.
|
|
|
|
<p>The intermediate, grouped outputs are always stored in
|
|
{@link SequenceFile}s. Applications can specify if and how the intermediate
|
|
outputs are to be compressed and which {@link CompressionCodec}s are to be
|
|
used via the <code>JobConf</code>.</p>
|
|
|
|
<p>If the job has
|
|
<a href="{@docRoot}/org/apache/hadoop/mapred/JobConf.html#ReducerNone">zero
|
|
reduces</a> then the output of the <code>Mapper</code> is directly written
|
|
to the {@link FileSystem} without grouping by keys.</p>
|
|
|
|
<p>Example:</p>
|
|
<p><blockquote><pre>
|
|
public class MyMapper<K extends WritableComparable, V extends Writable>
|
|
extends MapReduceBase implements Mapper<K, V, K, V> {
|
|
|
|
static enum MyCounters { NUM_RECORDS }
|
|
|
|
private String mapTaskId;
|
|
private String inputFile;
|
|
private int noRecords = 0;
|
|
|
|
public void configure(JobConf job) {
|
|
mapTaskId = job.get(JobContext.TASK_ATTEMPT_ID);
|
|
inputFile = job.get(JobContext.MAP_INPUT_FILE);
|
|
}
|
|
|
|
public void map(K key, V val,
|
|
OutputCollector<K, V> output, Reporter reporter)
|
|
throws IOException {
|
|
// Process the <key, value> pair (assume this takes a while)
|
|
// ...
|
|
// ...
|
|
|
|
// Let the framework know that we are alive, and kicking!
|
|
// reporter.progress();
|
|
|
|
// Process some more
|
|
// ...
|
|
// ...
|
|
|
|
// Increment the no. of <key, value> pairs processed
|
|
++noRecords;
|
|
|
|
// Increment counters
|
|
reporter.incrCounter(NUM_RECORDS, 1);
|
|
|
|
// Every 100 records update application-level status
|
|
if ((noRecords%100) == 0) {
|
|
reporter.setStatus(mapTaskId + " processed " + noRecords +
|
|
" from input-file: " + inputFile);
|
|
}
|
|
|
|
// Output the result
|
|
output.collect(key, val);
|
|
}
|
|
}
|
|
</pre></blockquote></p>
|
|
|
|
<p>Applications may write a custom {@link MapRunnable} to exert greater
|
|
control on map processing e.g. multi-threaded <code>Mapper</code>s etc.</p>
|
|
|
|
@see JobConf
|
|
@see InputFormat
|
|
@see Partitioner
|
|
@see Reducer
|
|
@see MapReduceBase
|
|
@see MapRunnable
|
|
@see SequenceFile]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.Mapper -->
|
|
<!-- start class org.apache.hadoop.mapred.MapReduceBase -->
|
|
<class name="MapReduceBase" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Closeable"/>
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<constructor name="MapReduceBase"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Default implementation that does nothing.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Default implementation that does nothing.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Base class for {@link Mapper} and {@link Reducer} implementations.
|
|
|
|
<p>Provides default no-op implementations for a few methods, most non-trivial
|
|
applications need to override some of them.</p>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.MapReduceBase -->
|
|
<!-- start interface org.apache.hadoop.mapred.MapRunnable -->
|
|
<interface name="MapRunnable" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="input" type="org.apache.hadoop.mapred.RecordReader"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Start mapping input <tt><key, value></tt> pairs.
|
|
|
|
<p>Mapping of input records to output records is complete when this method
|
|
returns.</p>
|
|
|
|
@param input the {@link RecordReader} to read the input records.
|
|
@param output the {@link OutputCollector} to collect the outputrecords.
|
|
@param reporter {@link Reporter} to report progress, status-updates etc.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Expert: Generic interface for {@link Mapper}s.
|
|
|
|
<p>Custom implementations of <code>MapRunnable</code> can exert greater
|
|
control on map processing e.g. multi-threaded, asynchronous mappers etc.</p>
|
|
|
|
@see Mapper]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.MapRunnable -->
|
|
<!-- start class org.apache.hadoop.mapred.MapRunner -->
|
|
<class name="MapRunner" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.MapRunnable"/>
|
|
<constructor name="MapRunner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="input" type="org.apache.hadoop.mapred.RecordReader"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getMapper" return="org.apache.hadoop.mapred.Mapper"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Default {@link MapRunnable} implementation.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.MapRunner -->
|
|
<!-- start class org.apache.hadoop.mapred.MultiFileInputFormat -->
|
|
<class name="MultiFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultiFileInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="numSplits" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An abstract {@link InputFormat} that returns {@link MultiFileSplit}'s
|
|
in {@link #getSplits(JobConf, int)} method. Splits are constructed from
|
|
the files under the input paths. Each split returned contains <i>nearly</i>
|
|
equal content length. <br>
|
|
Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)}
|
|
to construct <code>RecordReader</code>'s for <code>MultiFileSplit</code>'s.
|
|
@see MultiFileSplit]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.MultiFileInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.MultiFileSplit -->
|
|
<class name="MultiFileSplit" extends="org.apache.hadoop.mapred.lib.CombineFileSplit"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultiFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A sub-collection of input files. Unlike {@link FileSplit}, MultiFileSplit
|
|
class does not represent a split of a file, but a split of input files
|
|
into smaller sets. The atomic unit of split is a file. <br>
|
|
MultiFileSplit can be used to implement {@link RecordReader}'s, with
|
|
reading one record per file.
|
|
@see FileSplit
|
|
@see MultiFileInputFormat]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.MultiFileSplit -->
|
|
<!-- start interface org.apache.hadoop.mapred.OutputCollector -->
|
|
<interface name="OutputCollector" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="collect"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Adds a key/value pair to the output.
|
|
|
|
@param key the key to collect.
|
|
@param value to value to collect.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Collects the <code><key, value></code> pairs output by {@link Mapper}s
|
|
and {@link Reducer}s.
|
|
|
|
<p><code>OutputCollector</code> is the generalization of the facility
|
|
provided by the Map-Reduce framework to collect data output by either the
|
|
<code>Mapper</code> or the <code>Reducer</code> i.e. intermediate outputs
|
|
or the output of the job.</p>]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.OutputCollector -->
|
|
<!-- start class org.apache.hadoop.mapred.OutputCommitter -->
|
|
<class name="OutputCommitter" extends="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="OutputCommitter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setupJob"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For the framework to setup the job output during initialization
|
|
|
|
@param jobContext Context of the job whose output is being written.
|
|
@throws IOException if temporary output could not be created]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanupJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link #commitJob(JobContext)} or
|
|
{@link #abortJob(JobContext, int)} instead.">
|
|
<param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For cleaning up the job's output after job completion
|
|
|
|
@param jobContext Context of the job whose output is being written.
|
|
@throws IOException
|
|
@deprecated Use {@link #commitJob(JobContext)} or
|
|
{@link #abortJob(JobContext, int)} instead.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="commitJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For committing job's output after successful job completion. Note that this
|
|
is invoked for jobs with final runstate as SUCCESSFUL.
|
|
|
|
@param jobContext Context of the job whose output is being written.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="abortJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/>
|
|
<param name="status" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For aborting an unsuccessful job's output. Note that this is invoked for
|
|
jobs with final runstate as {@link JobStatus#FAILED} or
|
|
{@link JobStatus#KILLED}
|
|
|
|
@param jobContext Context of the job whose output is being written.
|
|
@param status final runstate of the job
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setupTask"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Sets up output for the task.
|
|
|
|
@param taskContext Context of the task whose output is being written.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="needsTaskCommit" return="boolean"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Check whether task needs a commit
|
|
|
|
@param taskContext
|
|
@return true/false
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="commitTask"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[To promote the task's temporary output to final output location
|
|
|
|
The task's output is moved to the job's output directory.
|
|
|
|
@param taskContext Context of the task whose output is being written.
|
|
@throws IOException if commit is not]]>
|
|
</doc>
|
|
</method>
|
|
<method name="abortTask"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Discard the task output
|
|
|
|
@param taskContext
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setupJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This method implements the new interface by calling the old method. Note
|
|
that the input types are different between the new and old apis and this
|
|
is a bridge between the two.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanupJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="Use {@link #commitJob(org.apache.hadoop.mapreduce.JobContext)}
|
|
or {@link #abortJob(org.apache.hadoop.mapreduce.JobContext, org.apache.hadoop.mapreduce.JobStatus.State)}
|
|
instead.">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This method implements the new interface by calling the old method. Note
|
|
that the input types are different between the new and old apis and this
|
|
is a bridge between the two.
|
|
@deprecated Use {@link #commitJob(org.apache.hadoop.mapreduce.JobContext)}
|
|
or {@link #abortJob(org.apache.hadoop.mapreduce.JobContext, org.apache.hadoop.mapreduce.JobStatus.State)}
|
|
instead.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="commitJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This method implements the new interface by calling the old method. Note
|
|
that the input types are different between the new and old apis and this
|
|
is a bridge between the two.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="abortJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<param name="runState" type="org.apache.hadoop.mapreduce.JobStatus.State"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This method implements the new interface by calling the old method. Note
|
|
that the input types are different between the new and old apis and this
|
|
is a bridge between the two.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setupTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This method implements the new interface by calling the old method. Note
|
|
that the input types are different between the new and old apis and this
|
|
is a bridge between the two.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="needsTaskCommit" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This method implements the new interface by calling the old method. Note
|
|
that the input types are different between the new and old apis and this
|
|
is a bridge between the two.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="commitTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This method implements the new interface by calling the old method. Note
|
|
that the input types are different between the new and old apis and this
|
|
is a bridge between the two.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="abortTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This method implements the new interface by calling the old method. Note
|
|
that the input types are different between the new and old apis and this
|
|
is a bridge between the two.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>OutputCommitter</code> describes the commit of task output for a
|
|
Map-Reduce job.
|
|
|
|
<p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of
|
|
the job to:<p>
|
|
<ol>
|
|
<li>
|
|
Setup the job during initialization. For example, create the temporary
|
|
output directory for the job during the initialization of the job.
|
|
</li>
|
|
<li>
|
|
Cleanup the job after the job completion. For example, remove the
|
|
temporary output directory after the job completion.
|
|
</li>
|
|
<li>
|
|
Setup the task temporary output.
|
|
</li>
|
|
<li>
|
|
Check whether a task needs a commit. This is to avoid the commit
|
|
procedure if a task does not need commit.
|
|
</li>
|
|
<li>
|
|
Commit of the task output.
|
|
</li>
|
|
<li>
|
|
Discard the task commit.
|
|
</li>
|
|
</ol>
|
|
|
|
@see FileOutputCommitter
|
|
@see JobContext
|
|
@see TaskAttemptContext]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.OutputCommitter -->
|
|
<!-- start interface org.apache.hadoop.mapred.OutputFormat -->
|
|
<interface name="OutputFormat" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link RecordWriter} for the given job.
|
|
|
|
@param ignored
|
|
@param job configuration for the job whose output is being written.
|
|
@param name the unique name for this part of the output.
|
|
@param progress mechanism for reporting progress while writing to file.
|
|
@return a {@link RecordWriter} to write the output for the job.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Check for validity of the output-specification for the job.
|
|
|
|
<p>This is to validate the output specification for the job when it is
|
|
a job is submitted. Typically checks that it does not already exist,
|
|
throwing an exception when it already exists, so that output is not
|
|
overwritten.</p>
|
|
|
|
@param ignored
|
|
@param job job configuration.
|
|
@throws IOException when output should not be attempted]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>OutputFormat</code> describes the output-specification for a
|
|
Map-Reduce job.
|
|
|
|
<p>The Map-Reduce framework relies on the <code>OutputFormat</code> of the
|
|
job to:<p>
|
|
<ol>
|
|
<li>
|
|
Validate the output-specification of the job. For e.g. check that the
|
|
output directory doesn't already exist.
|
|
<li>
|
|
Provide the {@link RecordWriter} implementation to be used to write out
|
|
the output files of the job. Output files are stored in a
|
|
{@link FileSystem}.
|
|
</li>
|
|
</ol>
|
|
|
|
@see RecordWriter
|
|
@see JobConf]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.OutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.OutputLogFilter -->
|
|
<class name="OutputLogFilter" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputLogFilter}
|
|
instead.">
|
|
<implements name="org.apache.hadoop.fs.PathFilter"/>
|
|
<constructor name="OutputLogFilter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="accept" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class filters log files from directory given
|
|
It doesnt accept paths having _logs.
|
|
This can be used to list paths of output directory as follows:
|
|
Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir,
|
|
new OutputLogFilter()));
|
|
@deprecated Use
|
|
{@link org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputLogFilter}
|
|
instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.OutputLogFilter -->
|
|
<!-- start interface org.apache.hadoop.mapred.Partitioner -->
|
|
<interface name="Partitioner" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<method name="getPartition" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K2"/>
|
|
<param name="value" type="V2"/>
|
|
<param name="numPartitions" type="int"/>
|
|
<doc>
|
|
<![CDATA[Get the paritition number for a given key (hence record) given the total
|
|
number of partitions i.e. number of reduce-tasks for the job.
|
|
|
|
<p>Typically a hash function on a all or a subset of the key.</p>
|
|
|
|
@param key the key to be paritioned.
|
|
@param value the entry value.
|
|
@param numPartitions the total number of partitions.
|
|
@return the partition number for the <code>key</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Partitions the key space.
|
|
|
|
<p><code>Partitioner</code> controls the partitioning of the keys of the
|
|
intermediate map-outputs. The key (or a subset of the key) is used to derive
|
|
the partition, typically by a hash function. The total number of partitions
|
|
is the same as the number of reduce tasks for the job. Hence this controls
|
|
which of the <code>m</code> reduce tasks the intermediate key (and hence the
|
|
record) is sent for reduction.</p>
|
|
|
|
@see Reducer]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.Partitioner -->
|
|
<!-- start interface org.apache.hadoop.mapred.RecordReader -->
|
|
<interface name="RecordReader" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Reads the next key/value pair from the input for processing.
|
|
|
|
@param key the key to read data into
|
|
@param value the value to read data into
|
|
@return true iff a key/value was read, false if at EOF]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createKey" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create an object of the appropriate type to be used as a key.
|
|
|
|
@return a new key object.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValue" return="V"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create an object of the appropriate type to be used as a value.
|
|
|
|
@return a new value object.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Returns the current position in the input.
|
|
|
|
@return the current position in the input.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close this {@link InputSplit} to future operations.
|
|
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[How much of the input has the {@link RecordReader} consumed i.e.
|
|
has been processed by?
|
|
|
|
@return progress from <code>0.0</code> to <code>1.0</code>.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>RecordReader</code> reads <key, value> pairs from an
|
|
{@link InputSplit}.
|
|
|
|
<p><code>RecordReader</code>, typically, converts the byte-oriented view of
|
|
the input, provided by the <code>InputSplit</code>, and presents a
|
|
record-oriented view for the {@link Mapper} & {@link Reducer} tasks for
|
|
processing. It thus assumes the responsibility of processing record
|
|
boundaries and presenting the tasks with keys and values.</p>
|
|
|
|
@see InputSplit
|
|
@see InputFormat]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.RecordReader -->
|
|
<!-- start interface org.apache.hadoop.mapred.RecordWriter -->
|
|
<interface name="RecordWriter" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Writes a key/value pair.
|
|
|
|
@param key the key to write.
|
|
@param value the value to write.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close this <code>RecordWriter</code> to future operations.
|
|
|
|
@param reporter facility to report progress.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>RecordWriter</code> writes the output <key, value> pairs
|
|
to an output file.
|
|
|
|
<p><code>RecordWriter</code> implementations write the job outputs to the
|
|
{@link FileSystem}.
|
|
|
|
@see OutputFormat]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.RecordWriter -->
|
|
<!-- start interface org.apache.hadoop.mapred.Reducer -->
|
|
<interface name="Reducer" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<implements name="org.apache.hadoop.io.Closeable"/>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K2"/>
|
|
<param name="values" type="java.util.Iterator"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[<i>Reduces</i> values for a given key.
|
|
|
|
<p>The framework calls this method for each
|
|
<code><key, (list of values)></code> pair in the grouped inputs.
|
|
Output values must be of the same type as input values. Input keys must
|
|
not be altered. The framework will <b>reuse</b> the key and value objects
|
|
that are passed into the reduce, therefore the application should clone
|
|
the objects they want to keep a copy of. In many cases, all values are
|
|
combined into zero or one value.
|
|
</p>
|
|
|
|
<p>Output pairs are collected with calls to
|
|
{@link OutputCollector#collect(Object,Object)}.</p>
|
|
|
|
<p>Applications can use the {@link Reporter} provided to report progress
|
|
or just indicate that they are alive. In scenarios where the application
|
|
takes an insignificant amount of time to process individual key/value
|
|
pairs, this is crucial since the framework might assume that the task has
|
|
timed-out and kill that task. The other way of avoiding this is to set
|
|
<a href="{@docRoot}/../mapred-default.html#mapreduce.task.timeout">
|
|
mapreduce.task.timeout</a> to a high-enough value (or even zero for no
|
|
time-outs).</p>
|
|
|
|
@param key the key.
|
|
@param values the list of values to reduce.
|
|
@param output to collect keys and combined values.
|
|
@param reporter facility to report progress.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Reduces a set of intermediate values which share a key to a smaller set of
|
|
values.
|
|
|
|
<p>The number of <code>Reducer</code>s for the job is set by the user via
|
|
{@link JobConf#setNumReduceTasks(int)}. <code>Reducer</code> implementations
|
|
can access the {@link JobConf} for the job via the
|
|
{@link JobConfigurable#configure(JobConf)} method and initialize themselves.
|
|
Similarly they can use the {@link Closeable#close()} method for
|
|
de-initialization.</p>
|
|
|
|
<p><code>Reducer</code> has 3 primary phases:</p>
|
|
<ol>
|
|
<li>
|
|
|
|
<h4 id="Shuffle">Shuffle</h4>
|
|
|
|
<p><code>Reducer</code> is input the grouped output of a {@link Mapper}.
|
|
In the phase the framework, for each <code>Reducer</code>, fetches the
|
|
relevant partition of the output of all the <code>Mapper</code>s, via HTTP.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<h4 id="Sort">Sort</h4>
|
|
|
|
<p>The framework groups <code>Reducer</code> inputs by <code>key</code>s
|
|
(since different <code>Mapper</code>s may have output the same key) in this
|
|
stage.</p>
|
|
|
|
<p>The shuffle and sort phases occur simultaneously i.e. while outputs are
|
|
being fetched they are merged.</p>
|
|
|
|
<h5 id="SecondarySort">SecondarySort</h5>
|
|
|
|
<p>If equivalence rules for keys while grouping the intermediates are
|
|
different from those for grouping keys before reduction, then one may
|
|
specify a <code>Comparator</code> via
|
|
{@link JobConf#setOutputValueGroupingComparator(Class)}.Since
|
|
{@link JobConf#setOutputKeyComparatorClass(Class)} can be used to
|
|
control how intermediate keys are grouped, these can be used in conjunction
|
|
to simulate <i>secondary sort on values</i>.</p>
|
|
|
|
|
|
For example, say that you want to find duplicate web pages and tag them
|
|
all with the url of the "best" known example. You would set up the job
|
|
like:
|
|
<ul>
|
|
<li>Map Input Key: url</li>
|
|
<li>Map Input Value: document</li>
|
|
<li>Map Output Key: document checksum, url pagerank</li>
|
|
<li>Map Output Value: url</li>
|
|
<li>Partitioner: by checksum</li>
|
|
<li>OutputKeyComparator: by checksum and then decreasing pagerank</li>
|
|
<li>OutputValueGroupingComparator: by checksum</li>
|
|
</ul>
|
|
</li>
|
|
|
|
<li>
|
|
<h4 id="Reduce">Reduce</h4>
|
|
|
|
<p>In this phase the
|
|
{@link #reduce(Object, Iterator, OutputCollector, Reporter)}
|
|
method is called for each <code><key, (list of values)></code> pair in
|
|
the grouped inputs.</p>
|
|
<p>The output of the reduce task is typically written to the
|
|
{@link FileSystem} via
|
|
{@link OutputCollector#collect(Object, Object)}.</p>
|
|
</li>
|
|
</ol>
|
|
|
|
<p>The output of the <code>Reducer</code> is <b>not re-sorted</b>.</p>
|
|
|
|
<p>Example:</p>
|
|
<p><blockquote><pre>
|
|
public class MyReducer<K extends WritableComparable, V extends Writable>
|
|
extends MapReduceBase implements Reducer<K, V, K, V> {
|
|
|
|
static enum MyCounters { NUM_RECORDS }
|
|
|
|
private String reduceTaskId;
|
|
private int noKeys = 0;
|
|
|
|
public void configure(JobConf job) {
|
|
reduceTaskId = job.get(JobContext.TASK_ATTEMPT_ID);
|
|
}
|
|
|
|
public void reduce(K key, Iterator<V> values,
|
|
OutputCollector<K, V> output,
|
|
Reporter reporter)
|
|
throws IOException {
|
|
|
|
// Process
|
|
int noValues = 0;
|
|
while (values.hasNext()) {
|
|
V value = values.next();
|
|
|
|
// Increment the no. of values for this key
|
|
++noValues;
|
|
|
|
// Process the <key, value> pair (assume this takes a while)
|
|
// ...
|
|
// ...
|
|
|
|
// Let the framework know that we are alive, and kicking!
|
|
if ((noValues%10) == 0) {
|
|
reporter.progress();
|
|
}
|
|
|
|
// Process some more
|
|
// ...
|
|
// ...
|
|
|
|
// Output the <key, value>
|
|
output.collect(key, value);
|
|
}
|
|
|
|
// Increment the no. of <key, list of values> pairs processed
|
|
++noKeys;
|
|
|
|
// Increment counters
|
|
reporter.incrCounter(NUM_RECORDS, 1);
|
|
|
|
// Every 100 keys update application-level status
|
|
if ((noKeys%100) == 0) {
|
|
reporter.setStatus(reduceTaskId + " processed " + noKeys);
|
|
}
|
|
}
|
|
}
|
|
</pre></blockquote></p>
|
|
|
|
@see Mapper
|
|
@see Partitioner
|
|
@see Reporter
|
|
@see MapReduceBase]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.Reducer -->
|
|
<!-- start interface org.apache.hadoop.mapred.Reporter -->
|
|
<interface name="Reporter" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Progressable"/>
|
|
<method name="setStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="status" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the status description for the task.
|
|
|
|
@param status brief description of the current status.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.Enum"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Counter} of the given group with the given name.
|
|
|
|
@param name counter name
|
|
@return the <code>Counter</code> of the given group/name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="group" type="java.lang.String"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Counter} of the given group with the given name.
|
|
|
|
@param group counter group
|
|
@param name counter name
|
|
@return the <code>Counter</code> of the given group/name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="incrCounter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Enum"/>
|
|
<param name="amount" type="long"/>
|
|
<doc>
|
|
<![CDATA[Increments the counter identified by the key, which can be of
|
|
any {@link Enum} type, by the specified amount.
|
|
|
|
@param key key to identify the counter to be incremented. The key can be
|
|
be any <code>Enum</code>.
|
|
@param amount A non-negative amount by which the counter is to
|
|
be incremented.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="incrCounter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="group" type="java.lang.String"/>
|
|
<param name="counter" type="java.lang.String"/>
|
|
<param name="amount" type="long"/>
|
|
<doc>
|
|
<![CDATA[Increments the counter identified by the group and counter name
|
|
by the specified amount.
|
|
|
|
@param group name to identify the group of the counter to be incremented.
|
|
@param counter name to identify the counter within the group.
|
|
@param amount A non-negative amount by which the counter is to
|
|
be incremented.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputSplit" return="org.apache.hadoop.mapred.InputSplit"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="UnsupportedOperationException" type="java.lang.UnsupportedOperationException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link InputSplit} object for a map.
|
|
|
|
@return the <code>InputSplit</code> that the map is reading from.
|
|
@throws UnsupportedOperationException if called outside a mapper]]>
|
|
</doc>
|
|
</method>
|
|
<field name="NULL" type="org.apache.hadoop.mapred.Reporter"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[A constant of Reporter type that does nothing.]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A facility for Map-Reduce applications to report progress and update
|
|
counters, status information etc.
|
|
|
|
<p>{@link Mapper} and {@link Reducer} can use the <code>Reporter</code>
|
|
provided to report progress or just indicate that they are alive. In
|
|
scenarios where the application takes an insignificant amount of time to
|
|
process individual key/value pairs, this is crucial since the framework
|
|
might assume that the task has timed-out and kill that task.
|
|
|
|
<p>Applications can also update {@link Counters} via the provided
|
|
<code>Reporter</code> .</p>
|
|
|
|
@see Progressable
|
|
@see Counters]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.Reporter -->
|
|
<!-- start interface org.apache.hadoop.mapred.RunningJob -->
|
|
<interface name="RunningJob" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="getConfiguration" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the underlying job configuration
|
|
|
|
@return the configuration of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getID" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the job identifier.
|
|
|
|
@return the job identifier.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobID" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="This method is deprecated and will be removed. Applications should
|
|
rather use {@link #getID()}.">
|
|
<doc>
|
|
<![CDATA[@deprecated This method is deprecated and will be removed. Applications should
|
|
rather use {@link #getID()}.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the name of the job.
|
|
|
|
@return the name of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobFile" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the path of the submitted job configuration.
|
|
|
|
@return the path of the submitted job configuration.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTrackingURL" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the URL where some job progress information will be displayed.
|
|
|
|
@return the URL where some job progress information will be displayed.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="mapProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the <i>progress</i> of the job's map-tasks, as a float between 0.0
|
|
and 1.0. When all map tasks have completed, the function returns 1.0.
|
|
|
|
@return the progress of the job's map-tasks.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reduceProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the <i>progress</i> of the job's reduce-tasks, as a float between 0.0
|
|
and 1.0. When all reduce tasks have completed, the function returns 1.0.
|
|
|
|
@return the progress of the job's reduce-tasks.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanupProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the <i>progress</i> of the job's cleanup-tasks, as a float between 0.0
|
|
and 1.0. When all cleanup tasks have completed, the function returns 1.0.
|
|
|
|
@return the progress of the job's cleanup-tasks.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setupProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the <i>progress</i> of the job's setup-tasks, as a float between 0.0
|
|
and 1.0. When all setup tasks have completed, the function returns 1.0.
|
|
|
|
@return the progress of the job's setup-tasks.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isComplete" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Check if the job is finished or not.
|
|
This is a non-blocking call.
|
|
|
|
@return <code>true</code> if the job is complete, else <code>false</code>.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isSuccessful" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Check if the job completed successfully.
|
|
|
|
@return <code>true</code> if the job succeeded, else <code>false</code>.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="waitForCompletion"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Blocks until the job is complete.
|
|
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobState" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Returns the current state of the Job.
|
|
{@link JobStatus}
|
|
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="killJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Kill the running job. Blocks until all job tasks have been
|
|
killed as well. If the job is no longer running, it simply returns.
|
|
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobPriority"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="priority" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Set the priority of a running job.
|
|
@param priority the new priority for the job.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskCompletionEvents" return="org.apache.hadoop.mapred.TaskCompletionEvent[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="startFrom" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get events indicating completion (success/failure) of component tasks.
|
|
|
|
@param startFrom index to start fetching events from
|
|
@return an array of {@link TaskCompletionEvent}s
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="killTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="shouldFail" type="boolean"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Kill indicated task attempt.
|
|
|
|
@param taskId the id of the task to be terminated.
|
|
@param shouldFail if true the task is failed and added to failed tasks
|
|
list, otherwise it is just killed, w/o affecting
|
|
job failure status.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="killTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Applications should rather use {@link #killTask(TaskAttemptID, boolean)}">
|
|
<param name="taskId" type="java.lang.String"/>
|
|
<param name="shouldFail" type="boolean"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@deprecated Applications should rather use {@link #killTask(TaskAttemptID, boolean)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounters" return="org.apache.hadoop.mapred.Counters"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets the counters for this job.
|
|
|
|
@return the counters for this job.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskDiagnostics" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets the diagnostic messages for a given task attempt.
|
|
@param taskid
|
|
@return the list of diagnostic messages for the task
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getHistoryUrl" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the url where history file is archived. Returns empty string if
|
|
history file is not available yet.
|
|
|
|
@return the url where history file is archived
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isRetired" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Check whether the job has been removed from JobTracker memory and retired.
|
|
On retire, the job history file is copied to a location known by
|
|
{@link #getHistoryUrl()}
|
|
@return <code>true</code> if the job retired, else <code>false</code>.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>RunningJob</code> is the user-interface to query for details on a
|
|
running Map-Reduce job.
|
|
|
|
<p>Clients can get hold of <code>RunningJob</code> via the {@link JobClient}
|
|
and then query the running-job for details such as name, configuration,
|
|
progress etc.</p>
|
|
|
|
@see JobClient]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.RunningJob -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat -->
|
|
<class name="SequenceFileAsBinaryInputFormat" extends="org.apache.hadoop.mapred.SequenceFileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileAsBinaryInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[InputFormat reading keys, values from SequenceFiles in binary (raw)
|
|
format.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader -->
|
|
<class name="SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordReader"/>
|
|
<constructor name="SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="createKey" return="org.apache.hadoop.io.BytesWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="createValue" return="org.apache.hadoop.io.BytesWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getKeyClassName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Retrieve the name of the key class for this SequenceFile.
|
|
@see org.apache.hadoop.io.SequenceFile.Reader#getKeyClassName]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getValueClassName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Retrieve the name of the value class for this SequenceFile.
|
|
@see org.apache.hadoop.io.SequenceFile.Reader#getValueClassName]]>
|
|
</doc>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.BytesWritable"/>
|
|
<param name="val" type="org.apache.hadoop.io.BytesWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read raw bytes from a SequenceFile.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Return the progress within the input split
|
|
@return 0.0 to 1.0 of the input byte range]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Read records from a SequenceFile as binary (raw) bytes.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat -->
|
|
<class name="SequenceFileAsBinaryOutputFormat" extends="org.apache.hadoop.mapred.SequenceFileOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileAsBinaryOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setSequenceFileOutputKeyClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the key class for the {@link SequenceFile}
|
|
<p>This allows the user to specify the key class to be different
|
|
from the actual class ({@link BytesWritable}) used for writing </p>
|
|
|
|
@param conf the {@link JobConf} to modify
|
|
@param theClass the SequenceFile output key class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSequenceFileOutputValueClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the value class for the {@link SequenceFile}
|
|
<p>This allows the user to specify the value class to be different
|
|
from the actual class ({@link BytesWritable}) used for writing </p>
|
|
|
|
@param conf the {@link JobConf} to modify
|
|
@param theClass the SequenceFile output key class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSequenceFileOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the key class for the {@link SequenceFile}
|
|
|
|
@return the key class of the {@link SequenceFile}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSequenceFileOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the value class for the {@link SequenceFile}
|
|
|
|
@return the value class of the {@link SequenceFile}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link OutputFormat} that writes keys, values to
|
|
{@link SequenceFile}s in binary(raw) format]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat.WritableValueBytes -->
|
|
<class name="SequenceFileAsBinaryOutputFormat.WritableValueBytes" extends="org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat.WritableValueBytes"
|
|
abstract="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileAsBinaryOutputFormat.WritableValueBytes"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[Inner class used for appendRaw]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat.WritableValueBytes -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileAsTextInputFormat -->
|
|
<class name="SequenceFileAsTextInputFormat" extends="org.apache.hadoop.mapred.SequenceFileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileAsTextInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class is similar to SequenceFileInputFormat,
|
|
except it generates SequenceFileAsTextRecordReader
|
|
which converts the input keys and values to their
|
|
String forms by calling toString() method.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileAsTextInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileAsTextRecordReader -->
|
|
<class name="SequenceFileAsTextRecordReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordReader"/>
|
|
<constructor name="SequenceFileAsTextRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="createKey" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="createValue" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.Text"/>
|
|
<param name="value" type="org.apache.hadoop.io.Text"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read key/value pair in a line.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class converts the input keys and values to their String forms by calling toString()
|
|
method. This class to SequenceFileAsTextInputFormat class is as LineRecordReader
|
|
class to TextInputFormat class.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileAsTextRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter -->
|
|
<class name="SequenceFileInputFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileInputFilter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Create a record reader for the given split
|
|
@param split file split
|
|
@param job job configuration
|
|
@param reporter reporter who sends report to task tracker
|
|
@return RecordReader]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setFilterClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="filterClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[set the filter class
|
|
|
|
@param conf application configuration
|
|
@param filterClass filter class]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A class that allows a map/red job to work on a sample of sequence files.
|
|
The sample is decided by the filter class set by the job.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter -->
|
|
<!-- start interface org.apache.hadoop.mapred.SequenceFileInputFilter.Filter -->
|
|
<interface name="SequenceFileInputFilter.Filter" abstract="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.Filter"/>
|
|
<doc>
|
|
<![CDATA[filter interface]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.SequenceFileInputFilter.Filter -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase -->
|
|
<class name="SequenceFileInputFilter.FilterBase" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.FilterBase"
|
|
abstract="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.SequenceFileInputFilter.Filter"/>
|
|
<constructor name="SequenceFileInputFilter.FilterBase"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[base class for Filters]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.MD5Filter -->
|
|
<class name="SequenceFileInputFilter.MD5Filter" extends="org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileInputFilter.MD5Filter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setFrequency"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="frequency" type="int"/>
|
|
<doc>
|
|
<![CDATA[set the filtering frequency in configuration
|
|
|
|
@param conf configuration
|
|
@param frequency filtering frequency]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[configure the filter according to configuration
|
|
|
|
@param conf configuration]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Filtering method
|
|
If MD5(key) % frequency==0, return true; otherwise return false
|
|
@see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)]]>
|
|
</doc>
|
|
</method>
|
|
<field name="MD5_LEN" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class returns a set of records by examing the MD5 digest of its
|
|
key against a filtering frequency <i>f</i>. The filtering criteria is
|
|
MD5(key) % f == 0.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.MD5Filter -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.PercentFilter -->
|
|
<class name="SequenceFileInputFilter.PercentFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileInputFilter.PercentFilter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setFrequency"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="frequency" type="int"/>
|
|
<doc>
|
|
<![CDATA[set the frequency and stores it in conf
|
|
@param conf configuration
|
|
@param frequency filtering frequencey]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[configure the filter by checking the configuration
|
|
|
|
@param conf configuration]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Filtering method
|
|
If record# % frequency==0, return true; otherwise return false
|
|
@see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class returns a percentage of records
|
|
The percentage is determined by a filtering frequency <i>f</i> using
|
|
the criteria record# % f == 0.
|
|
For example, if the frequency is 10, one out of 10 records is returned.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.PercentFilter -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.RegexFilter -->
|
|
<class name="SequenceFileInputFilter.RegexFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileInputFilter.RegexFilter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setPattern"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="regex" type="java.lang.String"/>
|
|
<exception name="PatternSyntaxException" type="java.util.regex.PatternSyntaxException"/>
|
|
</method>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[configure the Filter by checking the configuration]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Filtering method
|
|
If key matches the regex, return true; otherwise return false
|
|
@see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Records filter by matching key to regex]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.RegexFilter -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFormat -->
|
|
<class name="SequenceFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link InputFormat} for {@link SequenceFile}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileOutputFormat -->
|
|
<class name="SequenceFileOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getReaders" return="org.apache.hadoop.io.SequenceFile.Reader[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="dir" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Open the output generated by this format.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputCompressionType" return="org.apache.hadoop.io.SequenceFile.CompressionType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link CompressionType} for the output {@link SequenceFile}.
|
|
@param conf the {@link JobConf}
|
|
@return the {@link CompressionType} for the output {@link SequenceFile},
|
|
defaulting to {@link CompressionType#RECORD}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputCompressionType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="style" type="org.apache.hadoop.io.SequenceFile.CompressionType"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link CompressionType} for the output {@link SequenceFile}.
|
|
@param conf the {@link JobConf} to modify
|
|
@param style the {@link CompressionType} for the output
|
|
{@link SequenceFile}]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link OutputFormat} that writes {@link SequenceFile}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileRecordReader -->
|
|
<class name="SequenceFileRecordReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordReader"/>
|
|
<constructor name="SequenceFileRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="getKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The class of key that must be passed to {@link
|
|
#next(Object, Object)}..]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The class of value that must be passed to {@link
|
|
#next(Object, Object)}..]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createKey" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="createValue" return="V"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getCurrentValue"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Return the progress within the input split
|
|
@return 0.0 to 1.0 of the input byte range]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="seek"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="pos" type="long"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="conf" type="org.apache.hadoop.conf.Configuration"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[An {@link RecordReader} for {@link SequenceFile}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.SkipBadRecords -->
|
|
<class name="SkipBadRecords" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SkipBadRecords"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getAttemptsToStartSkipping" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Get the number of Task attempts AFTER which skip mode
|
|
will be kicked off. When skip mode is kicked off, the
|
|
tasks reports the range of records which it will process
|
|
next to the TaskTracker. So that on failures, TT knows which
|
|
ones are possibly the bad records. On further executions,
|
|
those are skipped.
|
|
Default value is 2.
|
|
|
|
@param conf the configuration
|
|
@return attemptsToStartSkipping no of task attempts]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setAttemptsToStartSkipping"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="attemptsToStartSkipping" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the number of Task attempts AFTER which skip mode
|
|
will be kicked off. When skip mode is kicked off, the
|
|
tasks reports the range of records which it will process
|
|
next to the TaskTracker. So that on failures, TT knows which
|
|
ones are possibly the bad records. On further executions,
|
|
those are skipped.
|
|
Default value is 2.
|
|
|
|
@param conf the configuration
|
|
@param attemptsToStartSkipping no of task attempts]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getAutoIncrMapperProcCount" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Get the flag which if set to true,
|
|
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS} is incremented
|
|
by MapRunner after invoking the map function. This value must be set to
|
|
false for applications which process the records asynchronously
|
|
or buffer the input records. For example streaming.
|
|
In such cases applications should increment this counter on their own.
|
|
Default value is true.
|
|
|
|
@param conf the configuration
|
|
@return <code>true</code> if auto increment
|
|
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}.
|
|
<code>false</code> otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setAutoIncrMapperProcCount"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="autoIncr" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set the flag which if set to true,
|
|
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS} is incremented
|
|
by MapRunner after invoking the map function. This value must be set to
|
|
false for applications which process the records asynchronously
|
|
or buffer the input records. For example streaming.
|
|
In such cases applications should increment this counter on their own.
|
|
Default value is true.
|
|
|
|
@param conf the configuration
|
|
@param autoIncr whether to auto increment
|
|
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getAutoIncrReducerProcCount" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Get the flag which if set to true,
|
|
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS} is incremented
|
|
by framework after invoking the reduce function. This value must be set to
|
|
false for applications which process the records asynchronously
|
|
or buffer the input records. For example streaming.
|
|
In such cases applications should increment this counter on their own.
|
|
Default value is true.
|
|
|
|
@param conf the configuration
|
|
@return <code>true</code> if auto increment
|
|
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}.
|
|
<code>false</code> otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setAutoIncrReducerProcCount"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="autoIncr" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set the flag which if set to true,
|
|
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS} is incremented
|
|
by framework after invoking the reduce function. This value must be set to
|
|
false for applications which process the records asynchronously
|
|
or buffer the input records. For example streaming.
|
|
In such cases applications should increment this counter on their own.
|
|
Default value is true.
|
|
|
|
@param conf the configuration
|
|
@param autoIncr whether to auto increment
|
|
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSkipOutputPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Get the directory to which skipped records are written. By default it is
|
|
the sub directory of the output _logs directory.
|
|
User can stop writing skipped records by setting the value null.
|
|
|
|
@param conf the configuration.
|
|
@return path skip output directory. Null is returned if this is not set
|
|
and output directory is also not set.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSkipOutputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Set the directory to which skipped records are written. By default it is
|
|
the sub directory of the output _logs directory.
|
|
User can stop writing skipped records by setting the value null.
|
|
|
|
@param conf the configuration.
|
|
@param path skip output directory path]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapperMaxSkipRecords" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Get the number of acceptable skip records surrounding the bad record PER
|
|
bad record in mapper. The number includes the bad record as well.
|
|
To turn the feature of detection/skipping of bad records off, set the
|
|
value to 0.
|
|
The framework tries to narrow down the skipped range by retrying
|
|
until this threshold is met OR all attempts get exhausted for this task.
|
|
Set the value to Long.MAX_VALUE to indicate that framework need not try to
|
|
narrow down. Whatever records(depends on application) get skipped are
|
|
acceptable.
|
|
Default value is 0.
|
|
|
|
@param conf the configuration
|
|
@return maxSkipRecs acceptable skip records.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapperMaxSkipRecords"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="maxSkipRecs" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the number of acceptable skip records surrounding the bad record PER
|
|
bad record in mapper. The number includes the bad record as well.
|
|
To turn the feature of detection/skipping of bad records off, set the
|
|
value to 0.
|
|
The framework tries to narrow down the skipped range by retrying
|
|
until this threshold is met OR all attempts get exhausted for this task.
|
|
Set the value to Long.MAX_VALUE to indicate that framework need not try to
|
|
narrow down. Whatever records(depends on application) get skipped are
|
|
acceptable.
|
|
Default value is 0.
|
|
|
|
@param conf the configuration
|
|
@param maxSkipRecs acceptable skip records.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReducerMaxSkipGroups" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Get the number of acceptable skip groups surrounding the bad group PER
|
|
bad group in reducer. The number includes the bad group as well.
|
|
To turn the feature of detection/skipping of bad groups off, set the
|
|
value to 0.
|
|
The framework tries to narrow down the skipped range by retrying
|
|
until this threshold is met OR all attempts get exhausted for this task.
|
|
Set the value to Long.MAX_VALUE to indicate that framework need not try to
|
|
narrow down. Whatever groups(depends on application) get skipped are
|
|
acceptable.
|
|
Default value is 0.
|
|
|
|
@param conf the configuration
|
|
@return maxSkipGrps acceptable skip groups.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setReducerMaxSkipGroups"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="maxSkipGrps" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the number of acceptable skip groups surrounding the bad group PER
|
|
bad group in reducer. The number includes the bad group as well.
|
|
To turn the feature of detection/skipping of bad groups off, set the
|
|
value to 0.
|
|
The framework tries to narrow down the skipped range by retrying
|
|
until this threshold is met OR all attempts get exhausted for this task.
|
|
Set the value to Long.MAX_VALUE to indicate that framework need not try to
|
|
narrow down. Whatever groups(depends on application) get skipped are
|
|
acceptable.
|
|
Default value is 0.
|
|
|
|
@param conf the configuration
|
|
@param maxSkipGrps acceptable skip groups.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="COUNTER_GROUP" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Special counters which are written by the application and are
|
|
used by the framework for detecting bad records. For detecting bad records
|
|
these counters must be incremented by the application.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="COUNTER_MAP_PROCESSED_RECORDS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Number of processed map records.
|
|
@see SkipBadRecords#getAutoIncrMapperProcCount(Configuration)]]>
|
|
</doc>
|
|
</field>
|
|
<field name="COUNTER_REDUCE_PROCESSED_GROUPS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Number of processed reduce groups.
|
|
@see SkipBadRecords#getAutoIncrReducerProcCount(Configuration)]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[Utility class for skip bad records functionality. It contains various
|
|
settings related to skipping of bad records.
|
|
|
|
<p>Hadoop provides an optional mode of execution in which the bad records
|
|
are detected and skipped in further attempts.
|
|
|
|
<p>This feature can be used when map/reduce tasks crashes deterministically on
|
|
certain input. This happens due to bugs in the map/reduce function. The usual
|
|
course would be to fix these bugs. But sometimes this is not possible;
|
|
perhaps the bug is in third party libraries for which the source code is
|
|
not available. Due to this, the task never reaches to completion even with
|
|
multiple attempts and complete data for that task is lost.</p>
|
|
|
|
<p>With this feature, only a small portion of data is lost surrounding
|
|
the bad record, which may be acceptable for some user applications.
|
|
see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}</p>
|
|
|
|
<p>The skipping mode gets kicked off after certain no of failures
|
|
see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}</p>
|
|
|
|
<p>In the skipping mode, the map/reduce task maintains the record range which
|
|
is getting processed at all times. Before giving the input to the
|
|
map/reduce function, it sends this record range to the Task tracker.
|
|
If task crashes, the Task tracker knows which one was the last reported
|
|
range. On further attempts that range get skipped.</p>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SkipBadRecords -->
|
|
<!-- start interface org.apache.hadoop.mapred.TaskAttemptContext -->
|
|
<interface name="TaskAttemptContext" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<method name="getTaskAttemptID" return="org.apache.hadoop.mapred.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getProgressible" return="org.apache.hadoop.util.Progressable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobConf" return="org.apache.hadoop.mapred.JobConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.TaskAttemptContext -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskAttemptID -->
|
|
<class name="TaskAttemptID" extends="org.apache.hadoop.mapreduce.TaskAttemptID"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskAttemptID" type="org.apache.hadoop.mapred.TaskID, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskAttemptID object from given {@link TaskID}.
|
|
@param taskId TaskID that this task belongs to
|
|
@param id the task attempt number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskAttemptID" type="java.lang.String, int, boolean, int, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link #TaskAttemptID(String, int, TaskType, int, int)}.">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskId object from given parts.
|
|
@param jtIdentifier jobTracker identifier
|
|
@param jobId job number
|
|
@param isMap whether the tip is a map
|
|
@param taskId taskId number
|
|
@param id the task attempt number
|
|
@deprecated Use {@link #TaskAttemptID(String, int, TaskType, int, int)}.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskAttemptID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskId object from given parts.
|
|
@param jtIdentifier jobTracker identifier
|
|
@param jobId job number
|
|
@param type the TaskType
|
|
@param taskId taskId number
|
|
@param id the task attempt number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskAttemptID"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="downgrade" return="org.apache.hadoop.mapred.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="old" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
|
|
<doc>
|
|
<![CDATA[Downgrade a new TaskAttemptID to an old one
|
|
@param old the new id
|
|
@return either old or a new TaskAttemptID constructed to match old]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskID" return="org.apache.hadoop.mapred.TaskID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobID" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="read" return="org.apache.hadoop.mapred.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="forName" return="org.apache.hadoop.mapred.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="str" type="java.lang.String"/>
|
|
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
|
|
<doc>
|
|
<![CDATA[Construct a TaskAttemptID object from given string
|
|
@return constructed TaskAttemptID object or null if the given String is null
|
|
@throws IllegalArgumentException if the given string is malformed]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskAttemptIDsPattern" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jtIdentifier" type="java.lang.String"/>
|
|
<param name="jobId" type="java.lang.Integer"/>
|
|
<param name="isMap" type="java.lang.Boolean"/>
|
|
<param name="taskId" type="java.lang.Integer"/>
|
|
<param name="attemptId" type="java.lang.Integer"/>
|
|
<doc>
|
|
<![CDATA[Returns a regex pattern which matches task attempt IDs. Arguments can
|
|
be given null, in which case that part of the regex will be generic.
|
|
For example to obtain a regex matching <i>all task attempt IDs</i>
|
|
of <i>any jobtracker</i>, in <i>any job</i>, of the <i>first
|
|
map task</i>, we would use :
|
|
<pre>
|
|
TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
|
|
</pre>
|
|
which will return :
|
|
<pre> "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" </pre>
|
|
@param jtIdentifier jobTracker identifier, or null
|
|
@param jobId job number, or null
|
|
@param isMap whether the tip is a map, or null
|
|
@param taskId taskId number, or null
|
|
@param attemptId the task attempt number, or null
|
|
@return a regex pattern matching TaskAttemptIDs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskAttemptIDsPattern" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jtIdentifier" type="java.lang.String"/>
|
|
<param name="jobId" type="java.lang.Integer"/>
|
|
<param name="type" type="org.apache.hadoop.mapreduce.TaskType"/>
|
|
<param name="taskId" type="java.lang.Integer"/>
|
|
<param name="attemptId" type="java.lang.Integer"/>
|
|
<doc>
|
|
<![CDATA[Returns a regex pattern which matches task attempt IDs. Arguments can
|
|
be given null, in which case that part of the regex will be generic.
|
|
For example to obtain a regex matching <i>all task attempt IDs</i>
|
|
of <i>any jobtracker</i>, in <i>any job</i>, of the <i>first
|
|
map task</i>, we would use :
|
|
<pre>
|
|
TaskAttemptID.getTaskAttemptIDsPattern(null, null, TaskType.MAP, 1, null);
|
|
</pre>
|
|
which will return :
|
|
<pre> "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" </pre>
|
|
@param jtIdentifier jobTracker identifier, or null
|
|
@param jobId job number, or null
|
|
@param type the {@link TaskType}
|
|
@param taskId taskId number, or null
|
|
@param attemptId the task attempt number, or null
|
|
@return a regex pattern matching TaskAttemptIDs]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[TaskAttemptID represents the immutable and unique identifier for
|
|
a task attempt. Each task attempt is one particular instance of a Map or
|
|
Reduce Task identified by its TaskID.
|
|
|
|
TaskAttemptID consists of 2 parts. First part is the
|
|
{@link TaskID}, that this TaskAttemptID belongs to.
|
|
Second part is the task attempt number. <br>
|
|
An example TaskAttemptID is :
|
|
<code>attempt_200707121733_0003_m_000005_0</code> , which represents the
|
|
zeroth task attempt for the fifth map task in the third job
|
|
running at the jobtracker started at <code>200707121733</code>.
|
|
<p>
|
|
Applications should never construct or parse TaskAttemptID strings
|
|
, but rather use appropriate constructors or {@link #forName(String)}
|
|
method.
|
|
|
|
@see JobID
|
|
@see TaskID]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskAttemptID -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskCompletionEvent -->
|
|
<class name="TaskCompletionEvent" extends="org.apache.hadoop.mapreduce.TaskCompletionEvent"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskCompletionEvent"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Default constructor for Writable.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskCompletionEvent" type="int, org.apache.hadoop.mapred.TaskAttemptID, int, boolean, org.apache.hadoop.mapred.TaskCompletionEvent.Status, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructor. eventId should be created externally and incremented
|
|
per event for each job.
|
|
@param eventId event id, event id should be unique and assigned in
|
|
incrementally, starting from 0.
|
|
@param taskId task id
|
|
@param status task's status
|
|
@param taskTrackerHttp task tracker's host:port for http.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getTaskId" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="use {@link #getTaskAttemptId()} instead.">
|
|
<doc>
|
|
<![CDATA[Returns task id.
|
|
@return task id
|
|
@deprecated use {@link #getTaskAttemptId()} instead.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskAttemptId" return="org.apache.hadoop.mapred.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns task id.
|
|
@return task id]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskStatus" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns enum Status.SUCESS or Status.FAILURE.
|
|
@return task tracker status]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskId"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="use {@link #setTaskAttemptId(TaskAttemptID)} instead.">
|
|
<param name="taskId" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Sets task id.
|
|
@param taskId
|
|
@deprecated use {@link #setTaskAttemptId(TaskAttemptID)} instead.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskAttemptId"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<doc>
|
|
<![CDATA[Sets task id.
|
|
@param taskId]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="status" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status"/>
|
|
<doc>
|
|
<![CDATA[Set task status.
|
|
@param status]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskRunTime"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="taskCompletionTime" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the task completion time
|
|
@param taskCompletionTime time (in millisec) the task took to complete]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setEventId"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="eventId" type="int"/>
|
|
<doc>
|
|
<![CDATA[set event Id. should be assigned incrementally starting from 0.
|
|
@param eventId]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskTrackerHttp"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="taskTrackerHttp" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set task tracker http location.
|
|
@param taskTrackerHttp]]>
|
|
</doc>
|
|
</method>
|
|
<field name="EMPTY_ARRAY" type="org.apache.hadoop.mapred.TaskCompletionEvent[]"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This is used to track task completion events on
|
|
job tracker.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskCompletionEvent -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskCompletionEvent.Status -->
|
|
<class name="TaskCompletionEvent.Status" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskCompletionEvent.Status -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskID -->
|
|
<class name="TaskID" extends="org.apache.hadoop.mapreduce.TaskID"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, boolean, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link #TaskID(String, int, TaskType, int)}">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskID object from given {@link JobID}.
|
|
@param jobId JobID that this tip belongs to
|
|
@param isMap whether the tip is a map
|
|
@param id the tip number
|
|
@deprecated Use {@link #TaskID(String, int, TaskType, int)}]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskID" type="java.lang.String, int, boolean, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link #TaskID(org.apache.hadoop.mapreduce.JobID, TaskType,
|
|
int)}">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskInProgressId object from given parts.
|
|
@param jtIdentifier jobTracker identifier
|
|
@param jobId job number
|
|
@param isMap whether the tip is a map
|
|
@param id the tip number
|
|
@deprecated Use {@link #TaskID(org.apache.hadoop.mapreduce.JobID, TaskType,
|
|
int)}]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, org.apache.hadoop.mapreduce.TaskType, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskID object from given {@link JobID}.
|
|
@param jobId JobID that this tip belongs to
|
|
@param type the {@link TaskType}
|
|
@param id the tip number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskInProgressId object from given parts.
|
|
@param jtIdentifier jobTracker identifier
|
|
@param jobId job number
|
|
@param type the {@link TaskType}
|
|
@param id the tip number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskID"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="downgrade" return="org.apache.hadoop.mapred.TaskID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="old" type="org.apache.hadoop.mapreduce.TaskID"/>
|
|
<doc>
|
|
<![CDATA[Downgrade a new TaskID to an old one
|
|
@param old a new or old TaskID
|
|
@return either old or a new TaskID build to match old]]>
|
|
</doc>
|
|
</method>
|
|
<method name="read" return="org.apache.hadoop.mapred.TaskID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getJobID" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getTaskIDsPattern" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="Use {@link TaskID#getTaskIDsPattern(String, Integer, TaskType,
|
|
Integer)}">
|
|
<param name="jtIdentifier" type="java.lang.String"/>
|
|
<param name="jobId" type="java.lang.Integer"/>
|
|
<param name="isMap" type="java.lang.Boolean"/>
|
|
<param name="taskId" type="java.lang.Integer"/>
|
|
<doc>
|
|
<![CDATA[Returns a regex pattern which matches task IDs. Arguments can
|
|
be given null, in which case that part of the regex will be generic.
|
|
For example to obtain a regex matching <i>the first map task</i>
|
|
of <i>any jobtracker</i>, of <i>any job</i>, we would use :
|
|
<pre>
|
|
TaskID.getTaskIDsPattern(null, null, true, 1);
|
|
</pre>
|
|
which will return :
|
|
<pre> "task_[^_]*_[0-9]*_m_000001*" </pre>
|
|
@param jtIdentifier jobTracker identifier, or null
|
|
@param jobId job number, or null
|
|
@param isMap whether the tip is a map, or null
|
|
@param taskId taskId number, or null
|
|
@return a regex pattern matching TaskIDs
|
|
@deprecated Use {@link TaskID#getTaskIDsPattern(String, Integer, TaskType,
|
|
Integer)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskIDsPattern" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jtIdentifier" type="java.lang.String"/>
|
|
<param name="jobId" type="java.lang.Integer"/>
|
|
<param name="type" type="org.apache.hadoop.mapreduce.TaskType"/>
|
|
<param name="taskId" type="java.lang.Integer"/>
|
|
<doc>
|
|
<![CDATA[Returns a regex pattern which matches task IDs. Arguments can
|
|
be given null, in which case that part of the regex will be generic.
|
|
For example to obtain a regex matching <i>the first map task</i>
|
|
of <i>any jobtracker</i>, of <i>any job</i>, we would use :
|
|
<pre>
|
|
TaskID.getTaskIDsPattern(null, null, true, 1);
|
|
</pre>
|
|
which will return :
|
|
<pre> "task_[^_]*_[0-9]*_m_000001*" </pre>
|
|
@param jtIdentifier jobTracker identifier, or null
|
|
@param jobId job number, or null
|
|
@param type the {@link TaskType}, or null
|
|
@param taskId taskId number, or null
|
|
@return a regex pattern matching TaskIDs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="forName" return="org.apache.hadoop.mapred.TaskID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="str" type="java.lang.String"/>
|
|
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[TaskID represents the immutable and unique identifier for
|
|
a Map or Reduce Task. Each TaskID encompasses multiple attempts made to
|
|
execute the Map or Reduce Task, each of which are uniquely indentified by
|
|
their TaskAttemptID.
|
|
|
|
TaskID consists of 3 parts. First part is the {@link JobID}, that this
|
|
TaskInProgress belongs to. Second part of the TaskID is either 'm' or 'r'
|
|
representing whether the task is a map task or a reduce task.
|
|
And the third part is the task number. <br>
|
|
An example TaskID is :
|
|
<code>task_200707121733_0003_m_000005</code> , which represents the
|
|
fifth map task in the third job running at the jobtracker
|
|
started at <code>200707121733</code>.
|
|
<p>
|
|
Applications should never construct or parse TaskID strings
|
|
, but rather use appropriate constructors or {@link #forName(String)}
|
|
method.
|
|
|
|
@see JobID
|
|
@see TaskAttemptID]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskID -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskLog.Reader -->
|
|
<class name="TaskLog.Reader" extends="java.io.InputStream"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskLog.Reader" type="org.apache.hadoop.mapred.TaskAttemptID, org.apache.hadoop.mapred.TaskLog.LogName, long, long, boolean"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read a log file from start to end positions. The offsets may be negative,
|
|
in which case they are relative to the end of the file. For example,
|
|
Reader(taskid, kind, 0, -1) is the entire file and
|
|
Reader(taskid, kind, -4197, -1) is the last 4196 bytes.
|
|
@param taskid the id of the task to read the log file for
|
|
@param kind the kind of log to read
|
|
@param start the offset to read from (negative is relative to tail)
|
|
@param end the offset to read upto (negative is relative to tail)
|
|
@param isCleanup whether the attempt is cleanup attempt or not
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="read" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="read" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="buffer" type="byte[]"/>
|
|
<param name="offset" type="int"/>
|
|
<param name="length" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="available" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskLog.Reader -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskLogAppender -->
|
|
<class name="TaskLogAppender" extends="org.apache.log4j.FileAppender"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskLogAppender"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="activateOptions"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="append"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="event" type="org.apache.log4j.spi.LoggingEvent"/>
|
|
</method>
|
|
<method name="flush"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getTaskId" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Getter/Setter methods for log4j.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskId"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getTotalLogFileSize" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setTotalLogFileSize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="logSize" type="long"/>
|
|
</method>
|
|
<method name="setIsCleanup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="isCleanup" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the task is a cleanup attempt or not.
|
|
|
|
@param isCleanup
|
|
true if the task is cleanup attempt, false otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getIsCleanup" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get whether task is cleanup attempt or not.
|
|
|
|
@return true if the task is cleanup attempt, false otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A simple log4j-appender for the task child's
|
|
map-reduce system logs.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskLogAppender -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskReport -->
|
|
<class name="TaskReport" extends="org.apache.hadoop.mapreduce.TaskReport"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskReport"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getTaskID" return="org.apache.hadoop.mapred.TaskID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The id of the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounters" return="org.apache.hadoop.mapred.Counters"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setSuccessfulAttempt"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="t" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<doc>
|
|
<![CDATA[set successful attempt ID of the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSuccessfulTaskAttempt" return="org.apache.hadoop.mapred.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the attempt ID that took this task to completion]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setRunningTaskAttempts"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="runningAttempts" type="java.util.Collection"/>
|
|
<doc>
|
|
<![CDATA[set running attempt(s) of the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRunningTaskAttempts" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the running task attempt IDs for this task]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setFinishTime"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="finishTime" type="long"/>
|
|
<doc>
|
|
<![CDATA[set finish time of task.
|
|
@param finishTime finish time of task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setStartTime"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="startTime" type="long"/>
|
|
<doc>
|
|
<![CDATA[set start time of the task.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A report on the state of a task.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskReport -->
|
|
<!-- start class org.apache.hadoop.mapred.TextInputFormat -->
|
|
<class name="TextInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<constructor name="TextInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="isSplitable" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="file" type="org.apache.hadoop.fs.Path"/>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines.
|
|
Either linefeed or carriage-return are used to signal end of line. Keys are
|
|
the position in the file, and values are the line of text..]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TextInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.TextOutputFormat -->
|
|
<class name="TextOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TextOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link OutputFormat} that writes plain text files.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TextOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.TextOutputFormat.LineRecordWriter -->
|
|
<class name="TextOutputFormat.LineRecordWriter" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordWriter"/>
|
|
<constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="out" type="java.io.DataOutputStream"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TextOutputFormat.LineRecordWriter -->
|
|
<!-- start class org.apache.hadoop.mapred.Utils -->
|
|
<class name="Utils" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Utils"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[A utility class. It provides
|
|
A path filter utility to filter out output/part files in the output dir]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.Utils -->
|
|
<!-- start class org.apache.hadoop.mapred.Utils.OutputFileUtils -->
|
|
<class name="Utils.OutputFileUtils" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Utils.OutputFileUtils"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.Utils.OutputFileUtils -->
|
|
<!-- start class org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter -->
|
|
<class name="Utils.OutputFileUtils.OutputFilesFilter" extends="org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputLogFilter"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Utils.OutputFileUtils.OutputFilesFilter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="accept" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class filters output(part) files from the given directory
|
|
It does not accept files with filenames _logs and _SUCCESS.
|
|
This can be used to list paths of output directory as follows:
|
|
Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir,
|
|
new OutputFilesFilter()));]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter -->
|
|
<!-- start class org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputLogFilter -->
|
|
<class name="Utils.OutputFileUtils.OutputLogFilter" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.fs.PathFilter"/>
|
|
<constructor name="Utils.OutputFileUtils.OutputLogFilter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="accept" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class filters log files from directory given
|
|
It doesnt accept paths having _logs.
|
|
This can be used to list paths of output directory as follows:
|
|
Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir,
|
|
new OutputLogFilter()));]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputLogFilter -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapred.jobcontrol">
|
|
<!-- start class org.apache.hadoop.mapred.jobcontrol.Job -->
|
|
<class name="Job" extends="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Job" type="org.apache.hadoop.mapred.JobConf, java.util.ArrayList"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Construct a job.
|
|
@param jobConf a mapred job configuration representing a job to be executed.
|
|
@param dependingJobs an array of jobs the current job depends on]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="Job" type="org.apache.hadoop.mapred.JobConf"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="getAssignedJobID" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the mapred ID of this job as assigned by the
|
|
mapred framework.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setAssignedJobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="setAssignedJobID should not be called.
|
|
JOBID is set by the framework.">
|
|
<param name="mapredJobID" type="org.apache.hadoop.mapred.JobID"/>
|
|
<doc>
|
|
<![CDATA[@deprecated setAssignedJobID should not be called.
|
|
JOBID is set by the framework.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobConf" return="org.apache.hadoop.mapred.JobConf"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the mapred job conf of this job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobConf"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Set the mapred job conf for this job.
|
|
@param jobConf the mapred job conf for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getState" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the state of this job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobClient" return="org.apache.hadoop.mapred.JobClient"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the job client of this job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDependingJobs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the depending jobs of this job]]>
|
|
</doc>
|
|
</method>
|
|
<field name="SUCCESS" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="WAITING" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="RUNNING" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="READY" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="FAILED" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="DEPENDENT_FAILED" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.jobcontrol.Job -->
|
|
<!-- start class org.apache.hadoop.mapred.jobcontrol.JobControl -->
|
|
<class name="JobControl" extends="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobControl" type="java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a job control for a group of jobs.
|
|
@param groupName a name identifying this group]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getWaitingJobs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the jobs in the waiting state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRunningJobs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the jobs in the running state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReadyJobs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the jobs in the ready state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSuccessfulJobs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the jobs in the success state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getFailedJobs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="addJobs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobs" type="java.util.Collection"/>
|
|
<doc>
|
|
<![CDATA[Add a collection of jobs
|
|
|
|
@param jobs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getState" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the thread state]]>
|
|
</doc>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.jobcontrol.JobControl -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapred.join">
|
|
<!-- start class org.apache.hadoop.mapred.join.ArrayListBackedIterator -->
|
|
<class name="ArrayListBackedIterator" extends="org.apache.hadoop.mapreduce.lib.join.ArrayListBackedIterator"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
|
|
<constructor name="ArrayListBackedIterator"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="ArrayListBackedIterator" type="java.util.ArrayList"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[This class provides an implementation of ResetableIterator. The
|
|
implementation uses an {@link java.util.ArrayList} to store elements
|
|
added to it, replaying them as requested.
|
|
Prefer {@link StreamBackedIterator}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.ArrayListBackedIterator -->
|
|
<!-- start interface org.apache.hadoop.mapred.join.ComposableInputFormat -->
|
|
<interface name="ComposableInputFormat" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.InputFormat"/>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.join.ComposableRecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Refinement of InputFormat requiring implementors to provide
|
|
ComposableRecordReader instead of RecordReader.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.join.ComposableInputFormat -->
|
|
<!-- start interface org.apache.hadoop.mapred.join.ComposableRecordReader -->
|
|
<interface name="ComposableRecordReader" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordReader"/>
|
|
<implements name="java.lang.Comparable"/>
|
|
<method name="id" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the position in the collector this class occupies.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the key this RecordReader would supply on a call to next(K,V)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Clone the key at the head of this RecordReader into the object provided.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns true if the stream is not empty, but provides no guarantee that
|
|
a call to next(K,V) will succeed.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="skip"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/>
|
|
<param name="key" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[While key-value pairs from this RecordReader match the given key, register
|
|
them with the JoinCollector provided.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Additional operations required of a RecordReader to participate in a join.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.join.ComposableRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.join.CompositeInputFormat -->
|
|
<class name="CompositeInputFormat" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ComposableInputFormat"/>
|
|
<constructor name="CompositeInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setFormat"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Interpret a given string as a composite expression.
|
|
{@code
|
|
func ::= <ident>([<func>,]*<func>)
|
|
func ::= tbl(<class>,"<path>")
|
|
class ::= @see java.lang.Class#forName(java.lang.String)
|
|
path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String)
|
|
}
|
|
Reads expression from the <tt>mapred.join.expr</tt> property and
|
|
user-supplied join types from <tt>mapred.join.define.<ident></tt>
|
|
types. Paths supplied to <tt>tbl</tt> are given as input paths to the
|
|
InputFormat class listed.
|
|
@see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addDefaults"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Adds the default set of identifiers to the parser.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="numSplits" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Build a CompositeInputSplit from the child InputFormats by assigning the
|
|
ith split from each child to the ith composite split.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.join.ComposableRecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Construct a CompositeRecordReader for the children of this InputFormat
|
|
as defined in the init expression.
|
|
The outermost join need only be composable, not necessarily a composite.
|
|
Mandating TupleWritable isn't strictly correct.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="compose" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="inf" type="java.lang.Class"/>
|
|
<param name="path" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Convenience method for constructing composite formats.
|
|
Given InputFormat class (inf), path (p) return:
|
|
{@code tbl(<inf>, <p>) }]]>
|
|
</doc>
|
|
</method>
|
|
<method name="compose" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="op" type="java.lang.String"/>
|
|
<param name="inf" type="java.lang.Class"/>
|
|
<param name="path" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[Convenience method for constructing composite formats.
|
|
Given operation (op), Object class (inf), set of paths (p) return:
|
|
{@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]>
|
|
</doc>
|
|
</method>
|
|
<method name="compose" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="op" type="java.lang.String"/>
|
|
<param name="inf" type="java.lang.Class"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path[]"/>
|
|
<doc>
|
|
<![CDATA[Convenience method for constructing composite formats.
|
|
Given operation (op), Object class (inf), set of paths (p) return:
|
|
{@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An InputFormat capable of performing joins over a set of data sources sorted
|
|
and partitioned the same way.
|
|
@see #setFormat
|
|
|
|
A user may define new join types by setting the property
|
|
<tt>mapred.join.define.<ident></tt> to a classname. In the expression
|
|
<tt>mapred.join.expr</tt>, the identifier will be assumed to be a
|
|
ComposableRecordReader.
|
|
<tt>mapred.join.keycomparator</tt> can be a classname used to compare keys
|
|
in the join.
|
|
@see JoinRecordReader
|
|
@see MultiFilterRecordReader]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.CompositeInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.join.CompositeInputSplit -->
|
|
<class name="CompositeInputSplit" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.InputSplit"/>
|
|
<constructor name="CompositeInputSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="CompositeInputSplit" type="int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="s" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add an InputSplit to this collection.
|
|
@throws IOException If capacity was not specified during construction
|
|
or if capacity has been reached.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="get" return="org.apache.hadoop.mapred.InputSplit"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<doc>
|
|
<![CDATA[Get ith child InputSplit.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Return the aggregate length of all child InputSplits currently added.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the length of ith child InputSplit.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Collect a set of hosts from all child InputSplits.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocation" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[getLocations from ith InputSplit.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Write splits in the following format.
|
|
{@code
|
|
<count><class1><class2>...<classn><split1><split2>...<splitn>
|
|
}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}
|
|
@throws IOException If the child InputSplit cannot be read, typically
|
|
for faliing access checks.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This InputSplit contains a set of child InputSplits. Any InputSplit inserted
|
|
into this collection must have a public default constructor.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.CompositeInputSplit -->
|
|
<!-- start class org.apache.hadoop.mapred.join.CompositeRecordReader -->
|
|
<class name="CompositeRecordReader" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.conf.Configurable"/>
|
|
<constructor name="CompositeRecordReader" type="int, int, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Create a RecordReader with <tt>capacity</tt> children to position
|
|
<tt>id</tt> in the parent reader.
|
|
The id of a root CompositeRecordReader is -1 by convention, but relying
|
|
on this is not recommended.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="combine" return="boolean"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="srcs" type="java.lang.Object[]"/>
|
|
<param name="value" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
</method>
|
|
<method name="id" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the position in the collector this class occupies.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordReaderQueue" return="java.util.PriorityQueue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return sorted list of RecordReaders for this composite.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getComparator" return="org.apache.hadoop.io.WritableComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return comparator defining the ordering for RecordReaders in this
|
|
composite.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="rr" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add a RecordReader to this collection.
|
|
The id() of a RecordReader determines where in the Tuple its
|
|
entry will appear. Adding RecordReaders with the same id has
|
|
undefined behavior.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the key for the current join or the value at the top of the
|
|
RecordReader heap.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Clone the key at the top of this RR into the given object.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return true if it is possible that this could emit more values.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="skip"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Pass skip key to child RRs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Obtain an iterator over the child RRs apropos of the value type
|
|
ultimately emitted from this join.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/>
|
|
<param name="key" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[If key provided matches that of this Composite, give JoinCollector
|
|
iterator over values it may emit.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="fillJoinCollector"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="iterkey" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For all child RRs offering the key provided, obtain an iterator
|
|
at that position in the JoinCollector.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="compareTo" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
|
|
<doc>
|
|
<![CDATA[Implement Comparable contract (compare key of join or head of heap
|
|
with that of another).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createKey" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a new key value common to all child RRs.
|
|
@throws ClassCastException if key classes differ.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createInternalValue" return="org.apache.hadoop.mapred.join.TupleWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a value to be used internally for joins.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Unsupported (returns zero in all cases).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close all child RRs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Report progress as the minimum of all child RR progress.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"
|
|
transient="false" volatile="false"
|
|
static="false" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="kids" type="org.apache.hadoop.mapred.join.ComposableRecordReader[]"
|
|
transient="false" volatile="false"
|
|
static="false" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A RecordReader that can effect joins of RecordReaders sharing a common key
|
|
type and partitioning.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.CompositeRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.join.InnerJoinRecordReader -->
|
|
<class name="InnerJoinRecordReader" extends="org.apache.hadoop.mapred.join.JoinRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="combine" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="srcs" type="java.lang.Object[]"/>
|
|
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<doc>
|
|
<![CDATA[Return true iff the tuple is full (all data sources contain this key).]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Full inner join.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.InnerJoinRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.join.JoinRecordReader -->
|
|
<class name="JoinRecordReader" extends="org.apache.hadoop.mapred.join.CompositeRecordReader"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
|
|
<constructor name="JoinRecordReader" type="int, org.apache.hadoop.mapred.JobConf, int, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Emit the next set of key, value pairs as defined by the child
|
|
RecordReaders and operation associated with this composite RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValue" return="org.apache.hadoop.mapred.join.TupleWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return an iterator wrapping the JoinCollector.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Base class for Composite joins returning Tuples of arbitrary Writables.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.JoinRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.join.JoinRecordReader.JoinDelegationIterator -->
|
|
<class name="JoinRecordReader.JoinDelegationIterator" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
|
|
<constructor name="JoinRecordReader.JoinDelegationIterator"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="replay" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="item" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="clear"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Since the JoinCollector is effecting our operation, we need only
|
|
provide an iterator proxy wrapping its operation.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.JoinRecordReader.JoinDelegationIterator -->
|
|
<!-- start class org.apache.hadoop.mapred.join.MultiFilterRecordReader -->
|
|
<class name="MultiFilterRecordReader" extends="org.apache.hadoop.mapred.join.CompositeRecordReader"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
|
|
<constructor name="MultiFilterRecordReader" type="int, org.apache.hadoop.mapred.JobConf, int, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="emit" return="V"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For each tuple emitted, return a value (typically one of the values
|
|
in the tuple).
|
|
Modifying the Writables in the tuple is permitted and unlikely to affect
|
|
join behavior in most cases, but it is not recommended. It's safer to
|
|
clone first.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="combine" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="srcs" type="java.lang.Object[]"/>
|
|
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<doc>
|
|
<![CDATA[Default implementation offers {@link #emit} every Tuple from the
|
|
collector (the outer join of child RRs).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValue" return="V"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return an iterator returning a single value from the tuple.
|
|
@see MultiFilterDelegationIterator]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Base class for Composite join returning values derived from multiple
|
|
sources, but generally not tuples.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.MultiFilterRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.join.MultiFilterRecordReader.MultiFilterDelegationIterator -->
|
|
<class name="MultiFilterRecordReader.MultiFilterDelegationIterator" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
|
|
<constructor name="MultiFilterRecordReader.MultiFilterDelegationIterator"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="replay" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="item" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="clear"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Proxy the JoinCollector, but include callback to emit.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.MultiFilterRecordReader.MultiFilterDelegationIterator -->
|
|
<!-- start class org.apache.hadoop.mapred.join.OuterJoinRecordReader -->
|
|
<class name="OuterJoinRecordReader" extends="org.apache.hadoop.mapred.join.JoinRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="combine" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="srcs" type="java.lang.Object[]"/>
|
|
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<doc>
|
|
<![CDATA[Emit everything from the collector.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Full outer join.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.OuterJoinRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.join.OverrideRecordReader -->
|
|
<class name="OverrideRecordReader" extends="org.apache.hadoop.mapred.join.MultiFilterRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="emit" return="V"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<doc>
|
|
<![CDATA[Emit the value with the highest position in the tuple.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="fillJoinCollector"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="iterkey" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Instead of filling the JoinCollector with iterators from all
|
|
data sources, fill only the rightmost for this key.
|
|
This not only saves space by discarding the other sources, but
|
|
it also emits the number of key-value pairs in the preferred
|
|
RecordReader instead of repeating that stream n times, where
|
|
n is the cardinality of the cross product of the discarded
|
|
streams for the given key.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Prefer the "rightmost" data source for this key.
|
|
For example, <tt>override(S1,S2,S3)</tt> will prefer values
|
|
from S3 over S2, and values from S2 over S1 for all keys
|
|
emitted from all sources.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.OverrideRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.join.Parser -->
|
|
<class name="Parser" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Parser"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[Very simple shift-reduce parser for join expressions.
|
|
|
|
This should be sufficient for the user extension permitted now, but ought to
|
|
be replaced with a parser generator if more complex grammars are supported.
|
|
In particular, this "shift-reduce" parser has no states. Each set
|
|
of formals requires a different internal node type, which is responsible for
|
|
interpreting the list of tokens it receives. This is sufficient for the
|
|
current grammar, but it has several annoying properties that might inhibit
|
|
extension. In particular, parenthesis are always function calls; an
|
|
algebraic or filter grammar would not only require a node type, but must
|
|
also work around the internals of this parser.
|
|
|
|
For most other cases, adding classes to the hierarchy- particularly by
|
|
extending JoinRecordReader and MultiFilterRecordReader- is fairly
|
|
straightforward. One need only override the relevant method(s) (usually only
|
|
{@link CompositeRecordReader#combine}) and include a property to map its
|
|
value to an identifier in the parser.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.Parser -->
|
|
<!-- start class org.apache.hadoop.mapred.join.Parser.Node -->
|
|
<class name="Parser.Node" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ComposableInputFormat"/>
|
|
<constructor name="Parser.Node" type="java.lang.String"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="addIdentifier"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="ident" type="java.lang.String"/>
|
|
<param name="mcstrSig" type="java.lang.Class[]"/>
|
|
<param name="nodetype" type="java.lang.Class"/>
|
|
<param name="cl" type="java.lang.Class"/>
|
|
<exception name="NoSuchMethodException" type="java.lang.NoSuchMethodException"/>
|
|
<doc>
|
|
<![CDATA[For a given identifier, add a mapping to the nodetype for the parse
|
|
tree and to the ComposableRecordReader to be created, including the
|
|
formals required to invoke the constructor.
|
|
The nodetype and constructor signature should be filled in from the
|
|
child node.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="id" type="int"/>
|
|
</method>
|
|
<method name="setKeyComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="cmpcl" type="java.lang.Class"/>
|
|
</method>
|
|
<field name="rrCstrMap" type="java.util.Map"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="id" type="int"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="ident" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="cmpcl" type="java.lang.Class"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.Parser.Node -->
|
|
<!-- start class org.apache.hadoop.mapred.join.Parser.NodeToken -->
|
|
<class name="Parser.NodeToken" extends="org.apache.hadoop.mapred.join.Parser.Token"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="getNode" return="org.apache.hadoop.mapred.join.Parser.Node"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.Parser.NodeToken -->
|
|
<!-- start class org.apache.hadoop.mapred.join.Parser.NumToken -->
|
|
<class name="Parser.NumToken" extends="org.apache.hadoop.mapred.join.Parser.Token"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Parser.NumToken" type="double"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getNum" return="double"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.Parser.NumToken -->
|
|
<!-- start class org.apache.hadoop.mapred.join.Parser.StrToken -->
|
|
<class name="Parser.StrToken" extends="org.apache.hadoop.mapred.join.Parser.Token"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Parser.StrToken" type="org.apache.hadoop.mapred.join.Parser.TType, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getStr" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.Parser.StrToken -->
|
|
<!-- start class org.apache.hadoop.mapred.join.Parser.Token -->
|
|
<class name="Parser.Token" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="getType" return="org.apache.hadoop.mapred.join.Parser.TType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getNode" return="org.apache.hadoop.mapred.join.Parser.Node"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getNum" return="double"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getStr" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Tagged-union type for tokens from the join expression.
|
|
@see Parser.TType]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.Parser.Token -->
|
|
<!-- start class org.apache.hadoop.mapred.join.Parser.TType -->
|
|
<class name="Parser.TType" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapred.join.Parser.TType[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapred.join.Parser.TType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.Parser.TType -->
|
|
<!-- start interface org.apache.hadoop.mapred.join.ResetableIterator -->
|
|
<interface name="ResetableIterator" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/>
|
|
<doc>
|
|
<![CDATA[This defines an interface to a stateful Iterator that can replay elements
|
|
added to it directly.
|
|
Note that this does not extend {@link java.util.Iterator}.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.join.ResetableIterator -->
|
|
<!-- start class org.apache.hadoop.mapred.join.ResetableIterator.EMPTY -->
|
|
<class name="ResetableIterator.EMPTY" extends="org.apache.hadoop.mapreduce.lib.join.ResetableIterator.EMPTY"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
|
|
<constructor name="ResetableIterator.EMPTY"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.ResetableIterator.EMPTY -->
|
|
<!-- start class org.apache.hadoop.mapred.join.StreamBackedIterator -->
|
|
<class name="StreamBackedIterator" extends="org.apache.hadoop.mapreduce.lib.join.StreamBackedIterator"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
|
|
<constructor name="StreamBackedIterator"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[This class provides an implementation of ResetableIterator. This
|
|
implementation uses a byte array to store elements added to it.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.StreamBackedIterator -->
|
|
<!-- start class org.apache.hadoop.mapred.join.TupleWritable -->
|
|
<class name="TupleWritable" extends="org.apache.hadoop.mapreduce.lib.join.TupleWritable"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TupleWritable"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create an empty tuple with no allocated storage for writables.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TupleWritable" type="org.apache.hadoop.io.Writable[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Initialize tuple with storage; unknown whether any of them contain
|
|
"written" values.]]>
|
|
</doc>
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[Writable type storing multiple {@link org.apache.hadoop.io.Writable}s.
|
|
|
|
This is *not* a general-purpose tuple type. In almost all cases, users are
|
|
encouraged to implement their own serializable types, which can perform
|
|
better validation and provide more efficient encodings than this class is
|
|
capable. TupleWritable relies on the join framework for type safety and
|
|
assumes its instances will rarely be persisted, assumptions not only
|
|
incompatible with, but contrary to the general case.
|
|
|
|
@see org.apache.hadoop.io.Writable]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.TupleWritable -->
|
|
<!-- start class org.apache.hadoop.mapred.join.WrappedRecordReader -->
|
|
<class name="WrappedRecordReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
|
|
<method name="id" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the key at the head of this RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="qkey" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Clone the key at the head of this RR into the object supplied.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return true if the RR- including the k,v pair stored in this object-
|
|
is exhausted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="skip"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read the next k,v pair into the head of this object; return true iff
|
|
the RR and this are exhausted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/>
|
|
<param name="key" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add an iterator to the collector at the position occupied by this
|
|
RecordReader over the values in this stream paired with the key
|
|
provided (ie register a stream of values from this source matching K
|
|
with a collector).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="U"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Write key-value pair at the head of this stream to the objects provided;
|
|
get next key-value pair from proxied RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createKey" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Request new key from proxied RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValue" return="U"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Request new value from proxied RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Request progress from proxied RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Request position from proxied RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Forward close request to proxied RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="compareTo" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
|
|
<doc>
|
|
<![CDATA[Implement Comparable contract (compare key at head of proxied RR
|
|
with that of another).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Return true iff compareTo(other) retn true.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Proxy class for a RecordReader participating in the join framework.
|
|
This class keeps track of the "head" key-value pair for the
|
|
provided RecordReader and keeps a store of values matching a key when
|
|
this source is participating in a join.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.WrappedRecordReader -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapred.lib">
|
|
<!-- start class org.apache.hadoop.mapred.lib.BinaryPartitioner -->
|
|
<class name="BinaryPartitioner" extends="org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Partitioner"/>
|
|
<constructor name="BinaryPartitioner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Partition {@link BinaryComparable} keys using a configurable part of
|
|
the bytes array returned by {@link BinaryComparable#getBytes()}.
|
|
|
|
@see org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.BinaryPartitioner -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.ChainMapper -->
|
|
<class name="ChainMapper" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<constructor name="ChainMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructor.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addMapper"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="klass" type="java.lang.Class"/>
|
|
<param name="inputKeyClass" type="java.lang.Class"/>
|
|
<param name="inputValueClass" type="java.lang.Class"/>
|
|
<param name="outputKeyClass" type="java.lang.Class"/>
|
|
<param name="outputValueClass" type="java.lang.Class"/>
|
|
<param name="byValue" type="boolean"/>
|
|
<param name="mapperConf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Adds a Mapper class to the chain job's JobConf.
|
|
<p/>
|
|
It has to be specified how key and values are passed from one element of
|
|
the chain to the next, by value or by reference. If a Mapper leverages the
|
|
assumed semantics that the key and values are not modified by the collector
|
|
'by value' must be used. If the Mapper does not expect this semantics, as
|
|
an optimization to avoid serialization and deserialization 'by reference'
|
|
can be used.
|
|
<p/>
|
|
For the added Mapper the configuration given for it,
|
|
<code>mapperConf</code>, have precedence over the job's JobConf. This
|
|
precedence is in effect when the task is running.
|
|
<p/>
|
|
IMPORTANT: There is no need to specify the output key/value classes for the
|
|
ChainMapper, this is done by the addMapper for the last mapper in the chain
|
|
<p/>
|
|
|
|
@param job job's JobConf to add the Mapper class.
|
|
@param klass the Mapper class to add.
|
|
@param inputKeyClass mapper input key class.
|
|
@param inputValueClass mapper input value class.
|
|
@param outputKeyClass mapper output key class.
|
|
@param outputValueClass mapper output value class.
|
|
@param byValue indicates if key/values should be passed by value
|
|
to the next Mapper in the chain, if any.
|
|
@param mapperConf a JobConf with the configuration for the Mapper
|
|
class. It is recommended to use a JobConf without default values using the
|
|
<code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Configures the ChainMapper and all the Mappers in the chain.
|
|
<p/>
|
|
If this method is overriden <code>super.configure(...)</code> should be
|
|
invoked at the beginning of the overwriter method.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Chains the <code>map(...)</code> methods of the Mappers in the chain.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Closes the ChainMapper and all the Mappers in the chain.
|
|
<p/>
|
|
If this method is overriden <code>super.close()</code> should be
|
|
invoked at the end of the overwriter method.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The ChainMapper class allows to use multiple Mapper classes within a single
|
|
Map task.
|
|
<p/>
|
|
The Mapper classes are invoked in a chained (or piped) fashion, the output of
|
|
the first becomes the input of the second, and so on until the last Mapper,
|
|
the output of the last Mapper will be written to the task's output.
|
|
<p/>
|
|
The key functionality of this feature is that the Mappers in the chain do not
|
|
need to be aware that they are executed in a chain. This enables having
|
|
reusable specialized Mappers that can be combined to perform composite
|
|
operations within a single task.
|
|
<p/>
|
|
Special care has to be taken when creating chains that the key/values output
|
|
by a Mapper are valid for the following Mapper in the chain. It is assumed
|
|
all Mappers and the Reduce in the chain use maching output and input key and
|
|
value classes as no conversion is done by the chaining code.
|
|
<p/>
|
|
Using the ChainMapper and the ChainReducer classes is possible to compose
|
|
Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And
|
|
immediate benefit of this pattern is a dramatic reduction in disk IO.
|
|
<p/>
|
|
IMPORTANT: There is no need to specify the output key/value classes for the
|
|
ChainMapper, this is done by the addMapper for the last mapper in the chain.
|
|
<p/>
|
|
ChainMapper usage pattern:
|
|
<p/>
|
|
<pre>
|
|
...
|
|
conf.setJobName("chain");
|
|
conf.setInputFormat(TextInputFormat.class);
|
|
conf.setOutputFormat(TextOutputFormat.class);
|
|
<p/>
|
|
JobConf mapAConf = new JobConf(false);
|
|
...
|
|
ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class,
|
|
Text.class, Text.class, true, mapAConf);
|
|
<p/>
|
|
JobConf mapBConf = new JobConf(false);
|
|
...
|
|
ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class,
|
|
LongWritable.class, Text.class, false, mapBConf);
|
|
<p/>
|
|
JobConf reduceConf = new JobConf(false);
|
|
...
|
|
ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class,
|
|
Text.class, Text.class, true, reduceConf);
|
|
<p/>
|
|
ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class,
|
|
LongWritable.class, Text.class, false, null);
|
|
<p/>
|
|
ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class,
|
|
LongWritable.class, LongWritable.class, true, null);
|
|
<p/>
|
|
FileInputFormat.setInputPaths(conf, inDir);
|
|
FileOutputFormat.setOutputPath(conf, outDir);
|
|
...
|
|
<p/>
|
|
JobClient jc = new JobClient(conf);
|
|
RunningJob job = jc.submitJob(conf);
|
|
...
|
|
</pre>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.ChainMapper -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.ChainReducer -->
|
|
<class name="ChainReducer" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Reducer"/>
|
|
<constructor name="ChainReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructor.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="setReducer"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="klass" type="java.lang.Class"/>
|
|
<param name="inputKeyClass" type="java.lang.Class"/>
|
|
<param name="inputValueClass" type="java.lang.Class"/>
|
|
<param name="outputKeyClass" type="java.lang.Class"/>
|
|
<param name="outputValueClass" type="java.lang.Class"/>
|
|
<param name="byValue" type="boolean"/>
|
|
<param name="reducerConf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Sets the Reducer class to the chain job's JobConf.
|
|
<p/>
|
|
It has to be specified how key and values are passed from one element of
|
|
the chain to the next, by value or by reference. If a Reducer leverages the
|
|
assumed semantics that the key and values are not modified by the collector
|
|
'by value' must be used. If the Reducer does not expect this semantics, as
|
|
an optimization to avoid serialization and deserialization 'by reference'
|
|
can be used.
|
|
<p/>
|
|
For the added Reducer the configuration given for it,
|
|
<code>reducerConf</code>, have precedence over the job's JobConf. This
|
|
precedence is in effect when the task is running.
|
|
<p/>
|
|
IMPORTANT: There is no need to specify the output key/value classes for the
|
|
ChainReducer, this is done by the setReducer or the addMapper for the last
|
|
element in the chain.
|
|
|
|
@param job job's JobConf to add the Reducer class.
|
|
@param klass the Reducer class to add.
|
|
@param inputKeyClass reducer input key class.
|
|
@param inputValueClass reducer input value class.
|
|
@param outputKeyClass reducer output key class.
|
|
@param outputValueClass reducer output value class.
|
|
@param byValue indicates if key/values should be passed by value
|
|
to the next Mapper in the chain, if any.
|
|
@param reducerConf a JobConf with the configuration for the Reducer
|
|
class. It is recommended to use a JobConf without default values using the
|
|
<code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addMapper"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="klass" type="java.lang.Class"/>
|
|
<param name="inputKeyClass" type="java.lang.Class"/>
|
|
<param name="inputValueClass" type="java.lang.Class"/>
|
|
<param name="outputKeyClass" type="java.lang.Class"/>
|
|
<param name="outputValueClass" type="java.lang.Class"/>
|
|
<param name="byValue" type="boolean"/>
|
|
<param name="mapperConf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Adds a Mapper class to the chain job's JobConf.
|
|
<p/>
|
|
It has to be specified how key and values are passed from one element of
|
|
the chain to the next, by value or by reference. If a Mapper leverages the
|
|
assumed semantics that the key and values are not modified by the collector
|
|
'by value' must be used. If the Mapper does not expect this semantics, as
|
|
an optimization to avoid serialization and deserialization 'by reference'
|
|
can be used.
|
|
<p/>
|
|
For the added Mapper the configuration given for it,
|
|
<code>mapperConf</code>, have precedence over the job's JobConf. This
|
|
precedence is in effect when the task is running.
|
|
<p/>
|
|
IMPORTANT: There is no need to specify the output key/value classes for the
|
|
ChainMapper, this is done by the addMapper for the last mapper in the chain
|
|
.
|
|
|
|
@param job chain job's JobConf to add the Mapper class.
|
|
@param klass the Mapper class to add.
|
|
@param inputKeyClass mapper input key class.
|
|
@param inputValueClass mapper input value class.
|
|
@param outputKeyClass mapper output key class.
|
|
@param outputValueClass mapper output value class.
|
|
@param byValue indicates if key/values should be passed by value
|
|
to the next Mapper in the chain, if any.
|
|
@param mapperConf a JobConf with the configuration for the Mapper
|
|
class. It is recommended to use a JobConf without default values using the
|
|
<code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Configures the ChainReducer, the Reducer and all the Mappers in the chain.
|
|
<p/>
|
|
If this method is overriden <code>super.configure(...)</code> should be
|
|
invoked at the beginning of the overwriter method.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="values" type="java.util.Iterator"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Chains the <code>reduce(...)</code> method of the Reducer with the
|
|
<code>map(...) </code> methods of the Mappers in the chain.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Closes the ChainReducer, the Reducer and all the Mappers in the chain.
|
|
<p/>
|
|
If this method is overriden <code>super.close()</code> should be
|
|
invoked at the end of the overwriter method.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The ChainReducer class allows to chain multiple Mapper classes after a
|
|
Reducer within the Reducer task.
|
|
<p/>
|
|
For each record output by the Reducer, the Mapper classes are invoked in a
|
|
chained (or piped) fashion, the output of the first becomes the input of the
|
|
second, and so on until the last Mapper, the output of the last Mapper will
|
|
be written to the task's output.
|
|
<p/>
|
|
The key functionality of this feature is that the Mappers in the chain do not
|
|
need to be aware that they are executed after the Reducer or in a chain.
|
|
This enables having reusable specialized Mappers that can be combined to
|
|
perform composite operations within a single task.
|
|
<p/>
|
|
Special care has to be taken when creating chains that the key/values output
|
|
by a Mapper are valid for the following Mapper in the chain. It is assumed
|
|
all Mappers and the Reduce in the chain use maching output and input key and
|
|
value classes as no conversion is done by the chaining code.
|
|
<p/>
|
|
Using the ChainMapper and the ChainReducer classes is possible to compose
|
|
Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And
|
|
immediate benefit of this pattern is a dramatic reduction in disk IO.
|
|
<p/>
|
|
IMPORTANT: There is no need to specify the output key/value classes for the
|
|
ChainReducer, this is done by the setReducer or the addMapper for the last
|
|
element in the chain.
|
|
<p/>
|
|
ChainReducer usage pattern:
|
|
<p/>
|
|
<pre>
|
|
...
|
|
conf.setJobName("chain");
|
|
conf.setInputFormat(TextInputFormat.class);
|
|
conf.setOutputFormat(TextOutputFormat.class);
|
|
<p/>
|
|
JobConf mapAConf = new JobConf(false);
|
|
...
|
|
ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class,
|
|
Text.class, Text.class, true, mapAConf);
|
|
<p/>
|
|
JobConf mapBConf = new JobConf(false);
|
|
...
|
|
ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class,
|
|
LongWritable.class, Text.class, false, mapBConf);
|
|
<p/>
|
|
JobConf reduceConf = new JobConf(false);
|
|
...
|
|
ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class,
|
|
Text.class, Text.class, true, reduceConf);
|
|
<p/>
|
|
ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class,
|
|
LongWritable.class, Text.class, false, null);
|
|
<p/>
|
|
ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class,
|
|
LongWritable.class, LongWritable.class, true, null);
|
|
<p/>
|
|
FileInputFormat.setInputPaths(conf, inDir);
|
|
FileOutputFormat.setOutputPath(conf, outDir);
|
|
...
|
|
<p/>
|
|
JobClient jc = new JobClient(conf);
|
|
RunningJob job = jc.submitJob(conf);
|
|
...
|
|
</pre>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.ChainReducer -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.CombineFileInputFormat -->
|
|
<class name="CombineFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.InputFormat"/>
|
|
<constructor name="CombineFileInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="numSplits" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="createPool"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="Use {@link #createPool(List)}.">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="filters" type="java.util.List"/>
|
|
<doc>
|
|
<![CDATA[Create a new pool and add the filters to it.
|
|
A split cannot have files from different pools.
|
|
@deprecated Use {@link #createPool(List)}.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createPool"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="Use {@link #createPool(PathFilter...)}.">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="filters" type="org.apache.hadoop.fs.PathFilter[]"/>
|
|
<doc>
|
|
<![CDATA[Create a new pool and add the filters to it.
|
|
A pathname can satisfy any one of the specified filters.
|
|
A split cannot have files from different pools.
|
|
@deprecated Use {@link #createPool(PathFilter...)}.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This is not implemented yet.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An abstract {@link org.apache.hadoop.mapred.InputFormat} that returns {@link CombineFileSplit}'s
|
|
in {@link org.apache.hadoop.mapred.InputFormat#getSplits(JobConf, int)} method.
|
|
Splits are constructed from the files under the input paths.
|
|
A split cannot have files from different pools.
|
|
Each split returned may contain blocks from different files.
|
|
If a maxSplitSize is specified, then blocks on the same node are
|
|
combined to form a single split. Blocks that are left over are
|
|
then combined with other blocks in the same rack.
|
|
If maxSplitSize is not specified, then blocks from the same rack
|
|
are combined in a single split; no attempt is made to create
|
|
node-local splits.
|
|
If the maxSplitSize is equal to the block size, then this class
|
|
is similar to the default spliting behaviour in Hadoop: each
|
|
block is a locally processed split.
|
|
Subclasses implement {@link org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit, JobConf, Reporter)}
|
|
to construct <code>RecordReader</code>'s for <code>CombineFileSplit</code>'s.
|
|
@see CombineFileSplit]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.CombineFileInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.CombineFileRecordReader -->
|
|
<class name="CombineFileRecordReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordReader"/>
|
|
<constructor name="CombineFileRecordReader" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.lib.CombineFileSplit, org.apache.hadoop.mapred.Reporter, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[A generic RecordReader that can hand out different recordReaders
|
|
for each chunk in the CombineFileSplit.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="createKey" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="createValue" return="V"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[return the amount of data processed]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[return progress based on the amount of data processed so far.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="initNextRecordReader" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the record reader for the next chunk in this CombineFileSplit.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="split" type="org.apache.hadoop.mapred.lib.CombineFileSplit"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="jc" type="org.apache.hadoop.mapred.JobConf"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="reporter" type="org.apache.hadoop.mapred.Reporter"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="rrClass" type="java.lang.Class"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="rrConstructor" type="java.lang.reflect.Constructor"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="fs" type="org.apache.hadoop.fs.FileSystem"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="idx" type="int"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="progress" type="long"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="curReader" type="org.apache.hadoop.mapred.RecordReader"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A generic RecordReader that can hand out different recordReaders
|
|
for each chunk in a {@link CombineFileSplit}.
|
|
A CombineFileSplit can combine data chunks from multiple files.
|
|
This class allows using different RecordReaders for processing
|
|
these data chunks from different files.
|
|
@see CombineFileSplit]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.CombineFileRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.CombineFileSplit -->
|
|
<class name="CombineFileSplit" extends="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.InputSplit"/>
|
|
<constructor name="CombineFileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[], long[], java.lang.String[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.lib.CombineFileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Copy constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getJob" return="org.apache.hadoop.mapred.JobConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.CombineFileSplit -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.FieldSelectionMapReduce -->
|
|
<class name="FieldSelectionMapReduce" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<implements name="org.apache.hadoop.mapred.Reducer"/>
|
|
<constructor name="FieldSelectionMapReduce"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="val" type="V"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[The identify function. Input key/value pair is written directly to output.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.Text"/>
|
|
<param name="values" type="java.util.Iterator"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class implements a mapper/reducer class that can be used to perform
|
|
field selections in a manner similar to unix cut. The input data is treated
|
|
as fields separated by a user specified separator (the default value is
|
|
"\t"). The user can specify a list of fields that form the map output keys,
|
|
and a list of fields that form the map output values. If the inputformat is
|
|
TextInputFormat, the mapper will ignore the key to the map function. and the
|
|
fields are from the value only. Otherwise, the fields are the union of those
|
|
from the key and those from the value.
|
|
|
|
The field separator is under attribute "mapreduce.fieldsel.data.field.separator"
|
|
|
|
The map output field list spec is under attribute
|
|
"mapreduce.fieldsel.map.output.key.value.fields.spec".
|
|
The value is expected to be like "keyFieldsSpec:valueFieldsSpec"
|
|
key/valueFieldsSpec are comma (,) separated field spec: fieldSpec,fieldSpec,fieldSpec ...
|
|
Each field spec can be a simple number (e.g. 5) specifying a specific field, or a range
|
|
(like 2-5) to specify a range of fields, or an open range (like 3-) specifying all
|
|
the fields starting from field 3. The open range field spec applies value fields only.
|
|
They have no effect on the key fields.
|
|
|
|
Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields 4,3,0 and 1 for keys,
|
|
and use fields 6,5,1,2,3,7 and above for values.
|
|
|
|
The reduce output field list spec is under attribute
|
|
"mapreduce.fieldsel.reduce.output.key.value.fields.spec".
|
|
|
|
The reducer extracts output key/value pairs in a similar manner, except that
|
|
the key is never ignored.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.FieldSelectionMapReduce -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.FilterOutputFormat -->
|
|
<class name="FilterOutputFormat" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.OutputFormat"/>
|
|
<constructor name="FilterOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="FilterOutputFormat" type="org.apache.hadoop.mapred.OutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a FilterOutputFormat based on the supplied output format.
|
|
@param out the underlying OutputFormat]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="baseOut" type="org.apache.hadoop.mapred.OutputFormat"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[FilterOutputFormat is a convenience class that wraps OutputFormat.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.FilterOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.FilterOutputFormat.FilterRecordWriter -->
|
|
<class name="FilterOutputFormat.FilterRecordWriter" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordWriter"/>
|
|
<constructor name="FilterOutputFormat.FilterRecordWriter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<constructor name="FilterOutputFormat.FilterRecordWriter" type="org.apache.hadoop.mapred.RecordWriter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="rawWriter" type="org.apache.hadoop.mapred.RecordWriter"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[<code>FilterRecordWriter</code> is a convenience wrapper
|
|
class that implements {@link RecordWriter}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.FilterOutputFormat.FilterRecordWriter -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.HashPartitioner -->
|
|
<class name="HashPartitioner" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Partitioner"/>
|
|
<constructor name="HashPartitioner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="getPartition" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K2"/>
|
|
<param name="value" type="V2"/>
|
|
<param name="numReduceTasks" type="int"/>
|
|
<doc>
|
|
<![CDATA[Use {@link Object#hashCode()} to partition.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Partition keys by their {@link Object#hashCode()}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.HashPartitioner -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.IdentityMapper -->
|
|
<class name="IdentityMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<constructor name="IdentityMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="val" type="V"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[The identify function. Input key/value pair is written directly to
|
|
output.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Implements the identity function, mapping inputs directly to outputs.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.IdentityMapper -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.IdentityReducer -->
|
|
<class name="IdentityReducer" extends="org.apache.hadoop.mapred.MapReduceBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Reducer"/>
|
|
<constructor name="IdentityReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="values" type="java.util.Iterator"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Writes all keys and values directly to output.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Performs no reduction, writing all input values directly to the output.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.IdentityReducer -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.InputSampler -->
|
|
<class name="InputSampler" extends="org.apache.hadoop.mapreduce.lib.partition.InputSampler"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="InputSampler" type="org.apache.hadoop.mapred.JobConf"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="writePartitionFile"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="sampler" type="org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.InputSampler -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.InverseMapper -->
|
|
<class name="InverseMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<constructor name="InverseMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[The inverse function. Input keys and values are swapped.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link Mapper} that swaps keys and values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.InverseMapper -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.KeyFieldBasedComparator -->
|
|
<class name="KeyFieldBasedComparator" extends="org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<constructor name="KeyFieldBasedComparator"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This comparator implementation provides a subset of the features provided
|
|
by the Unix/GNU Sort. In particular, the supported features are:
|
|
-n, (Sort numerically)
|
|
-r, (Reverse the result of comparison)
|
|
-k pos1[,pos2], where pos is of the form f[.c][opts], where f is the number
|
|
of the field to use, and c is the number of the first character from the
|
|
beginning of the field. Fields and character posns are numbered starting
|
|
with 1; a character position of zero in pos2 indicates the field's last
|
|
character. If '.c' is omitted from pos1, it defaults to 1 (the beginning
|
|
of the field); if omitted from pos2, it defaults to 0 (the end of the
|
|
field). opts are ordering options (any of 'nr' as described above).
|
|
We assume that the fields in the key are separated by
|
|
{@link JobContext#MAP_OUTPUT_KEY_FIELD_SEPERATOR}]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.KeyFieldBasedComparator -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner -->
|
|
<class name="KeyFieldBasedPartitioner" extends="org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedPartitioner"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Partitioner"/>
|
|
<constructor name="KeyFieldBasedPartitioner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Defines a way to partition keys based on certain key fields (also see
|
|
{@link KeyFieldBasedComparator}.
|
|
The key specification supported is of the form -k pos1[,pos2], where,
|
|
pos is of the form f[.c][opts], where f is the number
|
|
of the key field to use, and c is the number of the first character from
|
|
the beginning of the field. Fields and character posns are numbered
|
|
starting with 1; a character position of zero in pos2 indicates the
|
|
field's last character. If '.c' is omitted from pos1, it defaults to 1
|
|
(the beginning of the field); if omitted from pos2, it defaults to 0
|
|
(the end of the field).]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.LazyOutputFormat -->
|
|
<class name="LazyOutputFormat" extends="org.apache.hadoop.mapred.lib.FilterOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="LazyOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setOutputFormatClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the underlying output format for LazyOutputFormat.
|
|
@param job the {@link JobConf} to modify
|
|
@param theClass the underlying class]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A Convenience class that creates output lazily.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.LazyOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.LongSumReducer -->
|
|
<class name="LongSumReducer" extends="org.apache.hadoop.mapred.MapReduceBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Reducer"/>
|
|
<constructor name="LongSumReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="values" type="java.util.Iterator"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link Reducer} that sums long values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.LongSumReducer -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.MultipleInputs -->
|
|
<class name="MultipleInputs" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultipleInputs"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="addInputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="inputFormatClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Add a {@link Path} with a custom {@link InputFormat} to the list of
|
|
inputs for the map-reduce job.
|
|
|
|
@param conf The configuration of the job
|
|
@param path {@link Path} to be added to the list of inputs for the job
|
|
@param inputFormatClass {@link InputFormat} class to use for this path]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addInputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="inputFormatClass" type="java.lang.Class"/>
|
|
<param name="mapperClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Add a {@link Path} with a custom {@link InputFormat} and
|
|
{@link Mapper} to the list of inputs for the map-reduce job.
|
|
|
|
@param conf The configuration of the job
|
|
@param path {@link Path} to be added to the list of inputs for the job
|
|
@param inputFormatClass {@link InputFormat} class to use for this path
|
|
@param mapperClass {@link Mapper} class to use for this path]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class supports MapReduce jobs that have multiple input paths with
|
|
a different {@link InputFormat} and {@link Mapper} for each path]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.MultipleInputs -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.MultipleOutputFormat -->
|
|
<class name="MultipleOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultipleOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Create a composite record writer that can write key/value data to different
|
|
output files
|
|
|
|
@param fs
|
|
the file system to use
|
|
@param job
|
|
the job conf for the job
|
|
@param name
|
|
the leaf file name for the output file (such as part-00000")
|
|
@param arg3
|
|
a progressable for reporting progress.
|
|
@return a composite record writer
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="generateLeafFileName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Generate the leaf name for the output file name. The default behavior does
|
|
not change the leaf file name (such as part-00000)
|
|
|
|
@param name
|
|
the leaf file name for the output file
|
|
@return the given leaf file name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="generateFileNameForKeyValue" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Generate the file output file name based on the given key and the leaf file
|
|
name. The default behavior is that the file name does not depend on the
|
|
key.
|
|
|
|
@param key
|
|
the key of the output data
|
|
@param name
|
|
the leaf file name
|
|
@return generated file name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="generateActualKey" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<doc>
|
|
<![CDATA[Generate the actual key from the given key/value. The default behavior is that
|
|
the actual key is equal to the given key
|
|
|
|
@param key
|
|
the key of the output data
|
|
@param value
|
|
the value of the output data
|
|
@return the actual key derived from the given key/value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="generateActualValue" return="V"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<doc>
|
|
<![CDATA[Generate the actual value from the given key and value. The default behavior is that
|
|
the actual value is equal to the given value
|
|
|
|
@param key
|
|
the key of the output data
|
|
@param value
|
|
the value of the output data
|
|
@return the actual value derived from the given key/value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputFileBasedOutputFileName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Generate the outfile name based on a given anme and the input file name. If
|
|
the {@link JobContext#MAP_INPUT_FILE} does not exists (i.e. this is not for a map only job),
|
|
the given name is returned unchanged. If the config value for
|
|
"num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given
|
|
name is returned unchanged. Otherwise, return a file name consisting of the
|
|
N trailing legs of the input file name where N is the config value for
|
|
"num.of.trailing.legs.to.use".
|
|
|
|
@param job
|
|
the job config
|
|
@param name
|
|
the output file name
|
|
@return the outfile name based on a given anme and the input file name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@param fs
|
|
the file system to use
|
|
@param job
|
|
a job conf object
|
|
@param name
|
|
the name of the file over which a record writer object will be
|
|
constructed
|
|
@param arg3
|
|
a progressable object
|
|
@return A RecordWriter object over the given file
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This abstract class extends the FileOutputFormat, allowing to write the
|
|
output data to different output files. There are three basic use cases for
|
|
this class.
|
|
|
|
Case one: This class is used for a map reduce job with at least one reducer.
|
|
The reducer wants to write data to different files depending on the actual
|
|
keys. It is assumed that a key (or value) encodes the actual key (value)
|
|
and the desired location for the actual key (value).
|
|
|
|
Case two: This class is used for a map only job. The job wants to use an
|
|
output file name that is either a part of the input file name of the input
|
|
data, or some derivation of it.
|
|
|
|
Case three: This class is used for a map only job. The job wants to use an
|
|
output file name that depends on both the keys and the input file name,]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.MultipleOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.MultipleOutputs -->
|
|
<class name="MultipleOutputs" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultipleOutputs" type="org.apache.hadoop.mapred.JobConf"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Creates and initializes multiple named outputs support, it should be
|
|
instantiated in the Mapper/Reducer configure method.
|
|
|
|
@param job the job configuration object]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getNamedOutputsList" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Returns list of channel names.
|
|
|
|
@param conf job conf
|
|
@return List of channel Names]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isMultiNamedOutput" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Returns if a named output is multiple.
|
|
|
|
@param conf job conf
|
|
@param namedOutput named output
|
|
@return <code>true</code> if the name output is multi, <code>false</code>
|
|
if it is single. If the name output is not defined it returns
|
|
<code>false</code>]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNamedOutputFormatClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Returns the named output OutputFormat.
|
|
|
|
@param conf job conf
|
|
@param namedOutput named output
|
|
@return namedOutput OutputFormat]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNamedOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Returns the key class for a named output.
|
|
|
|
@param conf job conf
|
|
@param namedOutput named output
|
|
@return class for the named output key]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNamedOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Returns the value class for a named output.
|
|
|
|
@param conf job conf
|
|
@param namedOutput named output
|
|
@return class of named output value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addNamedOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<param name="outputFormatClass" type="java.lang.Class"/>
|
|
<param name="keyClass" type="java.lang.Class"/>
|
|
<param name="valueClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Adds a named output for the job.
|
|
<p/>
|
|
|
|
@param conf job conf to add the named output
|
|
@param namedOutput named output name, it has to be a word, letters
|
|
and numbers only, cannot be the word 'part' as
|
|
that is reserved for the
|
|
default output.
|
|
@param outputFormatClass OutputFormat class.
|
|
@param keyClass key class
|
|
@param valueClass value class]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addMultiNamedOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<param name="outputFormatClass" type="java.lang.Class"/>
|
|
<param name="keyClass" type="java.lang.Class"/>
|
|
<param name="valueClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Adds a multi named output for the job.
|
|
<p/>
|
|
|
|
@param conf job conf to add the named output
|
|
@param namedOutput named output name, it has to be a word, letters
|
|
and numbers only, cannot be the word 'part' as
|
|
that is reserved for the
|
|
default output.
|
|
@param outputFormatClass OutputFormat class.
|
|
@param keyClass key class
|
|
@param valueClass value class]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setCountersEnabled"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="enabled" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Enables or disables counters for the named outputs.
|
|
<p/>
|
|
By default these counters are disabled.
|
|
<p/>
|
|
MultipleOutputs supports counters, by default the are disabled.
|
|
The counters group is the {@link MultipleOutputs} class name.
|
|
</p>
|
|
The names of the counters are the same as the named outputs. For multi
|
|
named outputs the name of the counter is the concatenation of the named
|
|
output, and underscore '_' and the multiname.
|
|
|
|
@param conf job conf to enableadd the named output.
|
|
@param enabled indicates if the counters will be enabled or not.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCountersEnabled" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Returns if the counters for the named outputs are enabled or not.
|
|
<p/>
|
|
By default these counters are disabled.
|
|
<p/>
|
|
MultipleOutputs supports counters, by default the are disabled.
|
|
The counters group is the {@link MultipleOutputs} class name.
|
|
</p>
|
|
The names of the counters are the same as the named outputs. For multi
|
|
named outputs the name of the counter is the concatenation of the named
|
|
output, and underscore '_' and the multiname.
|
|
|
|
|
|
@param conf job conf to enableadd the named output.
|
|
@return TRUE if the counters are enabled, FALSE if they are disabled.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNamedOutputs" return="java.util.Iterator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns iterator with the defined name outputs.
|
|
|
|
@return iterator with the defined named outputs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCollector" return="org.apache.hadoop.mapred.OutputCollector"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets the output collector for a named output.
|
|
<p/>
|
|
|
|
@param namedOutput the named output name
|
|
@param reporter the reporter
|
|
@return the output collector for the given named output
|
|
@throws IOException thrown if output collector could not be created]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCollector" return="org.apache.hadoop.mapred.OutputCollector"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<param name="multiName" type="java.lang.String"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets the output collector for a multi named output.
|
|
<p/>
|
|
|
|
@param namedOutput the named output name
|
|
@param multiName the multi name part
|
|
@param reporter the reporter
|
|
@return the output collector for the given named output
|
|
@throws IOException thrown if output collector could not be created]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Closes all the opened named outputs.
|
|
<p/>
|
|
If overriden subclasses must invoke <code>super.close()</code> at the
|
|
end of their <code>close()</code>
|
|
|
|
@throws java.io.IOException thrown if any of the MultipleOutput files
|
|
could not be closed properly.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The MultipleOutputs class simplifies writting to additional outputs other
|
|
than the job default output via the <code>OutputCollector</code> passed to
|
|
the <code>map()</code> and <code>reduce()</code> methods of the
|
|
<code>Mapper</code> and <code>Reducer</code> implementations.
|
|
<p/>
|
|
Each additional output, or named output, may be configured with its own
|
|
<code>OutputFormat</code>, with its own key class and with its own value
|
|
class.
|
|
<p/>
|
|
A named output can be a single file or a multi file. The later is refered as
|
|
a multi named output.
|
|
<p/>
|
|
A multi named output is an unbound set of files all sharing the same
|
|
<code>OutputFormat</code>, key class and value class configuration.
|
|
<p/>
|
|
When named outputs are used within a <code>Mapper</code> implementation,
|
|
key/values written to a name output are not part of the reduce phase, only
|
|
key/values written to the job <code>OutputCollector</code> are part of the
|
|
reduce phase.
|
|
<p/>
|
|
MultipleOutputs supports counters, by default the are disabled. The counters
|
|
group is the {@link MultipleOutputs} class name.
|
|
</p>
|
|
The names of the counters are the same as the named outputs. For multi
|
|
named outputs the name of the counter is the concatenation of the named
|
|
output, and underscore '_' and the multiname.
|
|
<p/>
|
|
Job configuration usage pattern is:
|
|
<pre>
|
|
|
|
JobConf conf = new JobConf();
|
|
|
|
conf.setInputPath(inDir);
|
|
FileOutputFormat.setOutputPath(conf, outDir);
|
|
|
|
conf.setMapperClass(MOMap.class);
|
|
conf.setReducerClass(MOReduce.class);
|
|
...
|
|
|
|
// Defines additional single text based output 'text' for the job
|
|
MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class,
|
|
LongWritable.class, Text.class);
|
|
|
|
// Defines additional multi sequencefile based output 'sequence' for the
|
|
// job
|
|
MultipleOutputs.addMultiNamedOutput(conf, "seq",
|
|
SequenceFileOutputFormat.class,
|
|
LongWritable.class, Text.class);
|
|
...
|
|
|
|
JobClient jc = new JobClient();
|
|
RunningJob job = jc.submitJob(conf);
|
|
|
|
...
|
|
</pre>
|
|
<p/>
|
|
Job configuration usage pattern is:
|
|
<pre>
|
|
|
|
public class MOReduce implements
|
|
Reducer<WritableComparable, Writable> {
|
|
private MultipleOutputs mos;
|
|
|
|
public void configure(JobConf conf) {
|
|
...
|
|
mos = new MultipleOutputs(conf);
|
|
}
|
|
|
|
public void reduce(WritableComparable key, Iterator<Writable> values,
|
|
OutputCollector output, Reporter reporter)
|
|
throws IOException {
|
|
...
|
|
mos.getCollector("text", reporter).collect(key, new Text("Hello"));
|
|
mos.getCollector("seq", "A", reporter).collect(key, new Text("Bye"));
|
|
mos.getCollector("seq", "B", reporter).collect(key, new Text("Chau"));
|
|
...
|
|
}
|
|
|
|
public void close() throws IOException {
|
|
mos.close();
|
|
...
|
|
}
|
|
|
|
}
|
|
</pre>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.MultipleOutputs -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat -->
|
|
<class name="MultipleSequenceFileOutputFormat" extends="org.apache.hadoop.mapred.lib.MultipleOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultipleSequenceFileOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class extends the MultipleOutputFormat, allowing to write the output data
|
|
to different output files in sequence file output format.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.MultipleTextOutputFormat -->
|
|
<class name="MultipleTextOutputFormat" extends="org.apache.hadoop.mapred.lib.MultipleOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultipleTextOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class extends the MultipleOutputFormat, allowing to write the output
|
|
data to different output files in Text output format.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.MultipleTextOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.MultithreadedMapRunner -->
|
|
<class name="MultithreadedMapRunner" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.MapRunnable"/>
|
|
<constructor name="MultithreadedMapRunner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="input" type="org.apache.hadoop.mapred.RecordReader"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Multithreaded implementation for @link org.apache.hadoop.mapred.MapRunnable.
|
|
<p>
|
|
It can be used instead of the default implementation,
|
|
@link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU
|
|
bound in order to improve throughput.
|
|
<p>
|
|
Map implementations using this MapRunnable must be thread-safe.
|
|
<p>
|
|
The Map-Reduce job has to be configured to use this MapRunnable class (using
|
|
the JobConf.setMapRunnerClass method) and
|
|
the number of thread the thread-pool can use with the
|
|
<code>mapred.map.multithreadedrunner.threads</code> property, its default
|
|
value is 10 threads.
|
|
<p>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.MultithreadedMapRunner -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.NLineInputFormat -->
|
|
<class name="NLineInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<constructor name="NLineInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="numSplits" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Logically splits the set of input files for the job, splits N lines
|
|
of the input as one split.
|
|
|
|
@see org.apache.hadoop.mapred.FileInputFormat#getSplits(JobConf, int)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[NLineInputFormat which splits N lines of input as one split.
|
|
|
|
In many "pleasantly" parallel applications, each process/mapper
|
|
processes the same input file (s), but with computations are
|
|
controlled by different parameters.(Referred to as "parameter sweeps").
|
|
One way to achieve this, is to specify a set of parameters
|
|
(one set per line) as input in a control file
|
|
(which is the input path to the map-reduce application,
|
|
where as the input dataset is specified
|
|
via a config variable in JobConf.).
|
|
|
|
The NLineInputFormat can be used in such applications, that splits
|
|
the input file such that by default, one line is fed as
|
|
a value to one map task, and key is the offset.
|
|
i.e. (k,v) is (LongWritable, Text).
|
|
The location hints will span the whole mapred cluster.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.NLineInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.NullOutputFormat -->
|
|
<class name="NullOutputFormat" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.OutputFormat"/>
|
|
<constructor name="NullOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Consume all outputs and put them in /dev/null.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.NullOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.RegexMapper -->
|
|
<class name="RegexMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<constructor name="RegexMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="org.apache.hadoop.io.Text"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link Mapper} that extracts text matching a regular expression.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.RegexMapper -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.TokenCountMapper -->
|
|
<class name="TokenCountMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<constructor name="TokenCountMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="org.apache.hadoop.io.Text"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link Mapper} that maps text values into <token,freq> pairs. Uses
|
|
{@link StringTokenizer} to break text into tokens.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.TokenCountMapper -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.TotalOrderPartitioner -->
|
|
<class name="TotalOrderPartitioner" extends="org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Partitioner"/>
|
|
<constructor name="TotalOrderPartitioner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Partitioner effecting a total order by reading split points from
|
|
an externally generated source.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.TotalOrderPartitioner -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapred.lib.aggregate">
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.DoubleValueSum -->
|
|
<class name="DoubleValueSum" extends="org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="DoubleValueSum"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that sums up a sequence of double
|
|
values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.DoubleValueSum -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueMax -->
|
|
<class name="LongValueMax" extends="org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="LongValueMax"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that maintain the maximum of
|
|
a sequence of long values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueMax -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueMin -->
|
|
<class name="LongValueMin" extends="org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="LongValueMin"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that maintain the minimum of
|
|
a sequence of long values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueMin -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueSum -->
|
|
<class name="LongValueSum" extends="org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="LongValueSum"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that sums up
|
|
a sequence of long values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueSum -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.StringValueMax -->
|
|
<class name="StringValueMax" extends="org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="StringValueMax"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that maintain the biggest of
|
|
a sequence of strings.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.StringValueMax -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.StringValueMin -->
|
|
<class name="StringValueMin" extends="org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="StringValueMin"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that maintain the smallest of
|
|
a sequence of strings.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.StringValueMin -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.UniqValueCount -->
|
|
<class name="UniqValueCount" extends="org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="UniqValueCount"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[the default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="UniqValueCount" type="long"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[constructor
|
|
@param maxNum the limit in the number of unique values to keep.]]>
|
|
</doc>
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that dedupes a sequence of objects.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.UniqValueCount -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.UserDefinedValueAggregatorDescriptor -->
|
|
<class name="UserDefinedValueAggregatorDescriptor" extends="org.apache.hadoop.mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor"/>
|
|
<constructor name="UserDefinedValueAggregatorDescriptor" type="java.lang.String, org.apache.hadoop.mapred.JobConf"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@param className the class name of the user defined descriptor class
|
|
@param job a configure object used for decriptor configuration]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="createInstance" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="className" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Create an instance of the given class
|
|
@param className the name of the class
|
|
@return a dynamically created instance of the given class]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Do nothing.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a wrapper for a user defined value aggregator descriptor.
|
|
It servs two functions: One is to create an object of ValueAggregatorDescriptor from the
|
|
name of a user defined class that may be dynamically loaded. The other is to
|
|
deligate inviokations of generateKeyValPairs function to the created object.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.UserDefinedValueAggregatorDescriptor -->
|
|
<!-- start interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregator -->
|
|
<interface name="ValueAggregator" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
|
|
<doc>
|
|
<![CDATA[This interface defines the minimal protocol for value aggregators.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregator -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor -->
|
|
<class name="ValueAggregatorBaseDescriptor" extends="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor"/>
|
|
<constructor name="ValueAggregatorBaseDescriptor"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="generateEntry" return="java.util.Map.Entry"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="type" type="java.lang.String"/>
|
|
<param name="id" type="java.lang.String"/>
|
|
<param name="val" type="org.apache.hadoop.io.Text"/>
|
|
<doc>
|
|
<![CDATA[@param type the aggregation type
|
|
@param id the aggregation id
|
|
@param val the val associated with the id to be aggregated
|
|
@return an Entry whose key is the aggregation id prefixed with
|
|
the aggregation type.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="generateValueAggregator" return="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="type" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[@param type the aggregation type
|
|
@return a value aggregator of the given type.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[get the input file name.
|
|
|
|
@param job a job configuration object]]>
|
|
</doc>
|
|
</method>
|
|
<field name="UNIQ_VALUE_COUNT" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="LONG_VALUE_SUM" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="DOUBLE_VALUE_SUM" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="VALUE_HISTOGRAM" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="LONG_VALUE_MAX" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="LONG_VALUE_MIN" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="STRING_VALUE_MAX" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="STRING_VALUE_MIN" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class implements the common functionalities of
|
|
the subclasses of ValueAggregatorDescriptor class.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorCombiner -->
|
|
<class name="ValueAggregatorCombiner" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ValueAggregatorCombiner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Combiner does not need to configure.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.Text"/>
|
|
<param name="values" type="java.util.Iterator"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Combines values for a given key.
|
|
@param key the key is expected to be a Text object, whose prefix indicates
|
|
the type of aggregation to aggregate the values.
|
|
@param values the values to combine
|
|
@param output to collect combined values]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Do nothing.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="arg0" type="K1"/>
|
|
<param name="arg1" type="V1"/>
|
|
<param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="arg3" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Do nothing. Should not be called.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements the generic combiner of Aggregate.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorCombiner -->
|
|
<!-- start interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor -->
|
|
<interface name="ValueAggregatorDescriptor" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"/>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Configure the object
|
|
|
|
@param job
|
|
a JobConf object that may contain the information that can be used
|
|
to configure the object.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="TYPE_SEPARATOR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="ONE" type="org.apache.hadoop.io.Text"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This interface defines the contract a value aggregator descriptor must
|
|
support. Such a descriptor can be configured with a JobConf object. Its main
|
|
function is to generate a list of aggregation-id/value pairs. An aggregation
|
|
id encodes an aggregation type which is used to guide the way to aggregate
|
|
the value in the reduce/combiner phrase of an Aggregate based job.The mapper in
|
|
an Aggregate based map/reduce job may create one or more of
|
|
ValueAggregatorDescriptor objects at configuration time. For each input
|
|
key/value pair, the mapper will use those objects to create aggregation
|
|
id/value pairs.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob -->
|
|
<class name="ValueAggregatorJob" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ValueAggregatorJob"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="createValueAggregatorJobs" return="org.apache.hadoop.mapred.jobcontrol.JobControl"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<param name="descriptors" type="java.lang.Class[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="createValueAggregatorJobs" return="org.apache.hadoop.mapred.jobcontrol.JobControl"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Create an Aggregate based map/reduce job.
|
|
|
|
@param args the arguments used for job creation. Generic hadoop
|
|
arguments are accepted.
|
|
@return a JobConf object ready for submission.
|
|
|
|
@throws IOException
|
|
@see GenericOptionsParser]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<param name="descriptors" type="java.lang.Class[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="setAggregatorDescriptors"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="descriptors" type="java.lang.Class[]"/>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[create and run an Aggregate based map/reduce job.
|
|
|
|
@param args the arguments used for job creation
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This is the main class for creating a map/reduce job using Aggregate
|
|
framework. The Aggregate is a specialization of map/reduce framework,
|
|
specilizing for performing various simple aggregations.
|
|
|
|
Generally speaking, in order to implement an application using Map/Reduce
|
|
model, the developer is to implement Map and Reduce functions (and possibly
|
|
combine function). However, a lot of applications related to counting and
|
|
statistics computing have very similar characteristics. Aggregate abstracts
|
|
out the general patterns of these functions and implementing those patterns.
|
|
In particular, the package provides generic mapper/redducer/combiner classes,
|
|
and a set of built-in value aggregators, and a generic utility class that
|
|
helps user create map/reduce jobs using the generic class. The built-in
|
|
aggregators include:
|
|
|
|
sum over numeric values count the number of distinct values compute the
|
|
histogram of values compute the minimum, maximum, media,average, standard
|
|
deviation of numeric values
|
|
|
|
The developer using Aggregate will need only to provide a plugin class
|
|
conforming to the following interface:
|
|
|
|
public interface ValueAggregatorDescriptor { public ArrayList<Entry>
|
|
generateKeyValPairs(Object key, Object value); public void
|
|
configure(JobConfjob); }
|
|
|
|
The package also provides a base class, ValueAggregatorBaseDescriptor,
|
|
implementing the above interface. The user can extend the base class and
|
|
implement generateKeyValPairs accordingly.
|
|
|
|
The primary work of generateKeyValPairs is to emit one or more key/value
|
|
pairs based on the input key/value pair. The key in an output key/value pair
|
|
encode two pieces of information: aggregation type and aggregation id. The
|
|
value will be aggregated onto the aggregation id according the aggregation
|
|
type.
|
|
|
|
This class offers a function to generate a map/reduce job using Aggregate
|
|
framework. The function takes the following parameters: input directory spec
|
|
input format (text or sequence file) output directory a file specifying the
|
|
user plugin class]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase -->
|
|
<class name="ValueAggregatorJobBase" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<implements name="org.apache.hadoop.mapred.Reducer"/>
|
|
<constructor name="ValueAggregatorJobBase"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="logSpec"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="aggregatorDescriptorList" type="java.util.ArrayList"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This abstract class implements some common functionalities of the
|
|
the generic mapper, reducer and combiner classes of Aggregate.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorMapper -->
|
|
<class name="ValueAggregatorMapper" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ValueAggregatorMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K1"/>
|
|
<param name="value" type="V1"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[the map function. It iterates through the value aggregator descriptor
|
|
list to generate aggregation id/value pairs and emit them.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="arg0" type="org.apache.hadoop.io.Text"/>
|
|
<param name="arg1" type="java.util.Iterator"/>
|
|
<param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="arg3" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Do nothing. Should not be called.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements the generic mapper of Aggregate.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorMapper -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorReducer -->
|
|
<class name="ValueAggregatorReducer" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ValueAggregatorReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.Text"/>
|
|
<param name="values" type="java.util.Iterator"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@param key
|
|
the key is expected to be a Text object, whose prefix indicates
|
|
the type of aggregation to aggregate the values. In effect, data
|
|
driven computing is achieved. It is assumed that each aggregator's
|
|
getReport method emits appropriate output for the aggregator. This
|
|
may be further customiized.
|
|
@value the values to be aggregated]]>
|
|
</doc>
|
|
</method>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="arg0" type="K1"/>
|
|
<param name="arg1" type="V1"/>
|
|
<param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="arg3" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Do nothing. Should not be called]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements the generic reducer of Aggregate.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorReducer -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueHistogram -->
|
|
<class name="ValueHistogram" extends="org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="ValueHistogram"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that computes the
|
|
histogram of a sequence of strings.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueHistogram -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapred.lib.db">
|
|
<!-- start class org.apache.hadoop.mapred.lib.db.DBConfiguration -->
|
|
<class name="DBConfiguration" extends="org.apache.hadoop.mapreduce.lib.db.DBConfiguration"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="configureDB"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="driverClass" type="java.lang.String"/>
|
|
<param name="dbUrl" type="java.lang.String"/>
|
|
<param name="userName" type="java.lang.String"/>
|
|
<param name="passwd" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Sets the DB access related fields in the JobConf.
|
|
@param job the job
|
|
@param driverClass JDBC Driver class name
|
|
@param dbUrl JDBC DB access URL.
|
|
@param userName DB access username
|
|
@param passwd DB access passwd]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configureDB"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="driverClass" type="java.lang.String"/>
|
|
<param name="dbUrl" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Sets the DB access related fields in the JobConf.
|
|
@param job the job
|
|
@param driverClass JDBC Driver class name
|
|
@param dbUrl JDBC DB access URL.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="DRIVER_CLASS_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The JDBC Driver class name]]>
|
|
</doc>
|
|
</field>
|
|
<field name="URL_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[JDBC Database access URL]]>
|
|
</doc>
|
|
</field>
|
|
<field name="USERNAME_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[User name to access the database]]>
|
|
</doc>
|
|
</field>
|
|
<field name="PASSWORD_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Password to access the database]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_TABLE_NAME_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Input table name]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_FIELD_NAMES_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Field names in the Input table]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_CONDITIONS_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[WHERE clause in the input SELECT statement]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_ORDER_BY_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[ORDER BY clause in the input SELECT statement]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_QUERY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Whole input query, exluding LIMIT...OFFSET]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_COUNT_QUERY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Input query to get the count of records]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_CLASS_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Class name implementing DBWritable which will hold input tuples]]>
|
|
</doc>
|
|
</field>
|
|
<field name="OUTPUT_TABLE_NAME_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Output table name]]>
|
|
</doc>
|
|
</field>
|
|
<field name="OUTPUT_FIELD_NAMES_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Field names in the Output table]]>
|
|
</doc>
|
|
</field>
|
|
<field name="OUTPUT_FIELD_COUNT_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Number of fields in the Output table]]>
|
|
</doc>
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.db.DBConfiguration -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat -->
|
|
<class name="DBInputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DBInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.InputFormat"/>
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<constructor name="DBInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="chunks" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="inputClass" type="java.lang.Class"/>
|
|
<param name="tableName" type="java.lang.String"/>
|
|
<param name="conditions" type="java.lang.String"/>
|
|
<param name="orderBy" type="java.lang.String"/>
|
|
<param name="fieldNames" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[Initializes the map-part of the job with the appropriate input settings.
|
|
|
|
@param job The job
|
|
@param inputClass the class object implementing DBWritable, which is the
|
|
Java object holding tuple fields.
|
|
@param tableName The table to read data from
|
|
@param conditions The condition which to select data with, eg. '(updated >
|
|
20070101 AND length > 0)'
|
|
@param orderBy the fieldNames in the orderBy clause.
|
|
@param fieldNames The field names in the table
|
|
@see #setInput(JobConf, Class, String, String)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="inputClass" type="java.lang.Class"/>
|
|
<param name="inputQuery" type="java.lang.String"/>
|
|
<param name="inputCountQuery" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Initializes the map-part of the job with the appropriate input settings.
|
|
|
|
@param job The job
|
|
@param inputClass the class object implementing DBWritable, which is the
|
|
Java object holding tuple fields.
|
|
@param inputQuery the input query to select fields. Example :
|
|
"SELECT f1, f2, f3 FROM Mytable ORDER BY f1"
|
|
@param inputCountQuery the input query that returns the number of records in
|
|
the table.
|
|
Example : "SELECT COUNT(f1) FROM Mytable"
|
|
@see #setInput(JobConf, Class, String, String, String, String...)]]>
|
|
</doc>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit -->
|
|
<class name="DBInputFormat.DBInputSplit" extends="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"
|
|
abstract="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.InputSplit"/>
|
|
<constructor name="DBInputFormat.DBInputSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Default Constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="DBInputFormat.DBInputSplit" type="long, long"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Convenience Constructor
|
|
@param start the index of the first row to select
|
|
@param end the index of the last row to select]]>
|
|
</doc>
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[A InputSplit that spans a set of rows]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBRecordReader -->
|
|
<class name="DBInputFormat.DBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DBRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordReader"/>
|
|
<constructor name="DBInputFormat.DBRecordReader" type="org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.mapred.JobConf, java.sql.Connection, org.apache.hadoop.mapred.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
<doc>
|
|
<![CDATA[@param split The InputSplit to read data for
|
|
@throws SQLException]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="createKey" return="org.apache.hadoop.io.LongWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValue" return="T"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.LongWritable"/>
|
|
<param name="value" type="T"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A RecordReader that reads records from a SQL table.
|
|
Emits LongWritables containing the record number as
|
|
key and DBWritables as value.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat.NullDBWritable -->
|
|
<class name="DBInputFormat.NullDBWritable" extends="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.NullDBWritable"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.db.DBWritable"/>
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="DBInputFormat.NullDBWritable"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[A Class that does nothing, implementing DBWritable]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat.NullDBWritable -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.db.DBOutputFormat -->
|
|
<class name="DBOutputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DBOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.OutputFormat"/>
|
|
<constructor name="DBOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="filesystem" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="filesystem" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="tableName" type="java.lang.String"/>
|
|
<param name="fieldNames" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[Initializes the reduce-part of the job with the appropriate output settings
|
|
|
|
@param job The job
|
|
@param tableName The table to insert data into
|
|
@param fieldNames The field names in the table.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="tableName" type="java.lang.String"/>
|
|
<param name="fieldCount" type="int"/>
|
|
<doc>
|
|
<![CDATA[Initializes the reduce-part of the job with the appropriate output settings
|
|
|
|
@param job The job
|
|
@param tableName The table to insert data into
|
|
@param fieldCount the number of fields in the table.]]>
|
|
</doc>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.db.DBOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.db.DBOutputFormat.DBRecordWriter -->
|
|
<class name="DBOutputFormat.DBRecordWriter" extends="org.apache.hadoop.mapreduce.lib.db.DBOutputFormat.DBRecordWriter"
|
|
abstract="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordWriter"/>
|
|
<constructor name="DBOutputFormat.DBRecordWriter" type="java.sql.Connection, java.sql.PreparedStatement"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</constructor>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A RecordWriter that writes the reduce output to a SQL table]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.db.DBOutputFormat.DBRecordWriter -->
|
|
<!-- start interface org.apache.hadoop.mapred.lib.db.DBWritable -->
|
|
<interface name="DBWritable" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.db.DBWritable"/>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.lib.db.DBWritable -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapred.pipes">
|
|
<!-- start class org.apache.hadoop.mapred.pipes.Submitter -->
|
|
<class name="Submitter" extends="org.apache.hadoop.conf.Configured"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Tool"/>
|
|
<constructor name="Submitter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="Submitter" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getExecutable" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the URI of the application's executable.
|
|
@param conf
|
|
@return the URI where the application's executable is located]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setExecutable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="executable" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the URI for the application's executable. Normally this is a hdfs:
|
|
location.
|
|
@param conf
|
|
@param executable The URI of the application's executable.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setIsJavaRecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="value" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the job is using a Java RecordReader.
|
|
@param conf the configuration to modify
|
|
@param value the new value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getIsJavaRecordReader" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Check whether the job is using a Java RecordReader
|
|
@param conf the configuration to check
|
|
@return is it a Java RecordReader?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setIsJavaMapper"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="value" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the Mapper is written in Java.
|
|
@param conf the configuration to modify
|
|
@param value the new value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getIsJavaMapper" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Check whether the job is using a Java Mapper.
|
|
@param conf the configuration to check
|
|
@return is it a Java Mapper?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setIsJavaReducer"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="value" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the Reducer is written in Java.
|
|
@param conf the configuration to modify
|
|
@param value the new value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getIsJavaReducer" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Check whether the job is using a Java Reducer.
|
|
@param conf the configuration to check
|
|
@return is it a Java Reducer?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setIsJavaRecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="value" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the job will use a Java RecordWriter.
|
|
@param conf the configuration to modify
|
|
@param value the new value to set]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getIsJavaRecordWriter" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Will the reduce use a Java RecordWriter?
|
|
@param conf the configuration to check
|
|
@return true, if the output of the job will be written by Java]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getKeepCommandFile" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Does the user want to keep the command file for debugging? If this is
|
|
true, pipes will write a copy of the command data to a file in the
|
|
task directory named "downlink.data", which may be used to run the C++
|
|
program under the debugger. You probably also want to set
|
|
JobConf.setKeepFailedTaskFiles(true) to keep the entire directory from
|
|
being deleted.
|
|
To run using the data file, set the environment variable
|
|
"mapreduce.pipes.commandfile" to point to the file.
|
|
@param conf the configuration to check
|
|
@return will the framework save the command file?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setKeepCommandFile"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="keep" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether to keep the command file for debugging
|
|
@param conf the configuration to modify
|
|
@param keep the new value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="submitJob" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="Use {@link Submitter#runJob(JobConf)}">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Submit a job to the map/reduce cluster. All of the necessary modifications
|
|
to the job to run under pipes are made to the configuration.
|
|
@param conf the job to submit to the cluster (MODIFIED)
|
|
@throws IOException
|
|
@deprecated Use {@link Submitter#runJob(JobConf)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="runJob" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Submit a job to the map/reduce cluster. All of the necessary modifications
|
|
to the job to run under pipes are made to the configuration.
|
|
@param conf the job to submit to the cluster (MODIFIED)
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="jobSubmit" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Submit a job to the Map-Reduce framework.
|
|
This returns a handle to the {@link RunningJob} which can be used to track
|
|
the running-job.
|
|
|
|
@param conf the job configuration.
|
|
@return a handle to the {@link RunningJob} which can be used to track the
|
|
running-job.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
<doc>
|
|
<![CDATA[Submit a pipes job based on the command line arguments.
|
|
@param args]]>
|
|
</doc>
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="PRESERVE_COMMANDFILE" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="EXECUTABLE" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="INTERPRETOR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="IS_JAVA_MAP" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="IS_JAVA_RR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="IS_JAVA_RW" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="IS_JAVA_REDUCE" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="PARTITIONER" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="INPUT_FORMAT" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="PORT" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[The main entry point and job submitter. It may either be used as a command
|
|
line-based or API-based method to launch Pipes jobs.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.pipes.Submitter -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapred.tools">
|
|
<!-- start class org.apache.hadoop.mapred.tools.MRAdmin -->
|
|
<class name="MRAdmin" extends="org.apache.hadoop.conf.Configured"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Tool"/>
|
|
<constructor name="MRAdmin"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="MRAdmin" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Administrative access to Hadoop Map-Reduce.
|
|
|
|
Currently it only provides the ability to connect to the {@link JobTracker}
|
|
and 1) refresh the service-level authorization policy, 2) refresh queue acl
|
|
properties.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.tools.MRAdmin -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce">
|
|
<!-- start class org.apache.hadoop.mapreduce.Cluster -->
|
|
<class name="Cluster" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Cluster" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<constructor name="Cluster" type="java.net.InetSocketAddress, org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close the <code>Cluster</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getFileSystem" return="org.apache.hadoop.fs.FileSystem"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the file system where job-specific files are stored
|
|
|
|
@return object of FileSystem
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJob" return="org.apache.hadoop.mapreduce.Job"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapreduce.JobID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get job corresponding to jobid.
|
|
|
|
@param jobId
|
|
@return object of {@link Job}
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getQueues" return="org.apache.hadoop.mapreduce.QueueInfo[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get all the queues in cluster.
|
|
|
|
@return array of {@link QueueInfo}
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getQueue" return="org.apache.hadoop.mapreduce.QueueInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get queue information for the specified name.
|
|
|
|
@param name queuename
|
|
@return object of {@link QueueInfo}
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getClusterStatus" return="org.apache.hadoop.mapreduce.ClusterMetrics"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get current cluster status.
|
|
|
|
@return object of {@link ClusterMetrics}
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getActiveTaskTrackers" return="org.apache.hadoop.mapreduce.TaskTrackerInfo[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get all active trackers in the cluster.
|
|
|
|
@return array of {@link TaskTrackerInfo}
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getBlackListedTaskTrackers" return="org.apache.hadoop.mapreduce.TaskTrackerInfo[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get blacklisted trackers.
|
|
|
|
@return array of {@link TaskTrackerInfo}
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getAllJobs" return="org.apache.hadoop.mapreduce.Job[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get all the jobs in cluster.
|
|
|
|
@return array of {@link Job}
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSystemDir" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Grab the jobtracker system directory path where
|
|
job-specific files will be placed.
|
|
|
|
@return the system directory where job-specific files are to be placed.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getStagingAreaDir" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Grab the jobtracker's view of the staging directory path where
|
|
job-specific files will be placed.
|
|
|
|
@return the staging directory where job-specific files are to be placed.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobHistoryUrl" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapreduce.JobID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the job history file path for a given job id. The job history file at
|
|
this path may or may not be existing depending on the job completion state.
|
|
The file is present only for the completed jobs.
|
|
@param jobId the JobID of the job submitted by the current user.
|
|
@return the file path of the job history file
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getQueueAclsForCurrentUser" return="org.apache.hadoop.mapreduce.QueueAclsInfo[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Gets the Queue ACLs for current user
|
|
@return array of QueueAclsInfo object for current user.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRootQueues" return="org.apache.hadoop.mapreduce.QueueInfo[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Gets the root level queues.
|
|
@return array of JobQueueInfo object.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getChildQueues" return="org.apache.hadoop.mapreduce.QueueInfo[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="queueName" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Returns immediate children of queueName.
|
|
@param queueName
|
|
@return array of JobQueueInfo which are children of queueName
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobTrackerState" return="org.apache.hadoop.mapreduce.server.jobtracker.State"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get JobTracker's state
|
|
|
|
@return {@link State} of the JobTracker
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskTrackerExpiryInterval" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the tasktracker expiry interval for the cluster
|
|
@return the expiry interval in msec]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDelegationToken" return="org.apache.hadoop.security.token.Token"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="renewer" type="org.apache.hadoop.io.Text"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get a delegation token for the user from the JobTracker.
|
|
@param renewer the user who can renew the token
|
|
@return the new token
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="renewDelegationToken" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="token" type="org.apache.hadoop.security.token.Token"/>
|
|
<exception name="SecretManager.InvalidToken" type="org.apache.hadoop.security.token.SecretManager.InvalidToken"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Renew a delegation token
|
|
@param token the token to renew
|
|
@return the new expiration time
|
|
@throws InvalidToken
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cancelDelegationToken"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="token" type="org.apache.hadoop.security.token.Token"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Cancel a delegation token from the JobTracker
|
|
@param token the token to cancel
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Provides a way to access information about the map/reduce cluster.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Cluster -->
|
|
<!-- start class org.apache.hadoop.mapreduce.ClusterMetrics -->
|
|
<class name="ClusterMetrics" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="ClusterMetrics"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="ClusterMetrics" type="int, int, int, int, int, int, int, int, int, int, int, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRunningMaps" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of running map tasks in the cluster.
|
|
|
|
@return running maps]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRunningReduces" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of running reduce tasks in the cluster.
|
|
|
|
@return running reduces]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOccupiedMapSlots" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get number of occupied map slots in the cluster.
|
|
|
|
@return occupied map slot count]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOccupiedReduceSlots" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of occupied reduce slots in the cluster.
|
|
|
|
@return occupied reduce slot count]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReservedMapSlots" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get number of reserved map slots in the cluster.
|
|
|
|
@return reserved map slot count]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReservedReduceSlots" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of reserved reduce slots in the cluster.
|
|
|
|
@return reserved reduce slot count]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapSlotCapacity" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the total number of map slots in the cluster.
|
|
|
|
@return map slot capacity]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReduceSlotCapacity" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the total number of reduce slots in the cluster.
|
|
|
|
@return reduce slot capacity]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTotalJobSubmissions" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the total number of job submissions in the cluster.
|
|
|
|
@return total number of job submissions]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskTrackerCount" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of active trackers in the cluster.
|
|
|
|
@return active tracker count.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getBlackListedTaskTrackerCount" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of blacklisted trackers in the cluster.
|
|
|
|
@return blacklisted tracker count]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDecommissionedTaskTrackerCount" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of decommissioned trackers in the cluster.
|
|
|
|
@return decommissioned tracker count]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Status information on the current state of the Map-Reduce cluster.
|
|
|
|
<p><code>ClusterMetrics</code> provides clients with information such as:
|
|
<ol>
|
|
<li>
|
|
Size of the cluster.
|
|
</li>
|
|
<li>
|
|
Number of blacklisted and decommissioned trackers.
|
|
</li>
|
|
<li>
|
|
Slot capacity of the cluster.
|
|
</li>
|
|
<li>
|
|
The number of currently occupied/reserved map & reduce slots.
|
|
</li>
|
|
<li>
|
|
The number of currently running map & reduce tasks.
|
|
</li>
|
|
<li>
|
|
The number of job submissions.
|
|
</li>
|
|
</ol></p>
|
|
|
|
<p>Clients can query for the latest <code>ClusterMetrics</code>, via
|
|
{@link Cluster#getClusterStatus()}.</p>
|
|
|
|
@see Cluster]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.ClusterMetrics -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Counter -->
|
|
<class name="Counter" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="Counter"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="Counter" type="java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="Counter" type="java.lang.String, java.lang.String, long"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a counter.
|
|
@param name the name within the group's enum.
|
|
@param displayName a name to be displayed.
|
|
@param value the counter value.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="setDisplayName"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="displayName" type="java.lang.String"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read the binary representation of the counter]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Write the binary representation of the counter]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getDisplayName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the name of the counter.
|
|
@return the user facing name of the counter]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getValue" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[What is the current value of this counter?
|
|
@return the current value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="increment"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="incr" type="long"/>
|
|
<doc>
|
|
<![CDATA[Increment this counter by the given value
|
|
@param incr the value to increase this counter by]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericRight" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A named counter that tracks the progress of a map/reduce job.
|
|
|
|
<p><code>Counters</code> represent global counters, defined either by the
|
|
Map-Reduce framework or applications. Each <code>Counter</code> is named by
|
|
an {@link Enum} and has a long for the value.</p>
|
|
|
|
<p><code>Counters</code> are bunched into Groups, each comprising of
|
|
counters from a particular <code>Enum</code> class.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Counter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.CounterGroup -->
|
|
<class name="CounterGroup" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<implements name="java.lang.Iterable"/>
|
|
<constructor name="CounterGroup" type="java.lang.String"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="CounterGroup" type="java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a CounterGroup.
|
|
@param name the name of the group's enum.
|
|
@param displayName a name to be displayed for the group.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the internal name of the group
|
|
@return the internal name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDisplayName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the display name of the group.
|
|
@return the human readable name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addCounter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="counter" type="org.apache.hadoop.mapreduce.Counter"/>
|
|
<doc>
|
|
<![CDATA[Add a counter to this group.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="findCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="counterName" type="java.lang.String"/>
|
|
<param name="displayName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Find a counter in a group.
|
|
@param counterName the name of the counter
|
|
@param displayName the display name of the counter
|
|
@return the counter that was found or added]]>
|
|
</doc>
|
|
</method>
|
|
<method name="findCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="counterName" type="java.lang.String"/>
|
|
</method>
|
|
<method name="iterator" return="java.util.Iterator"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="size" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the number of counters in this group.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericRight" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="incrAllCounters"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="rightGroup" type="org.apache.hadoop.mapreduce.CounterGroup"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A group of {@link Counter}s that logically belong together. Typically,
|
|
it is an {@link Enum} subclass and the counters are the values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.CounterGroup -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Counters -->
|
|
<class name="Counters" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<implements name="java.lang.Iterable"/>
|
|
<constructor name="Counters"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="Counters" type="org.apache.hadoop.mapred.Counters"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Utility method to create a Counters object from the
|
|
org.apache.hadoop.mapred counters
|
|
@param counters]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addGroup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="group" type="org.apache.hadoop.mapreduce.CounterGroup"/>
|
|
<doc>
|
|
<![CDATA[Add a group.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="findCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="groupName" type="java.lang.String"/>
|
|
<param name="counterName" type="java.lang.String"/>
|
|
</method>
|
|
<method name="findCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Enum"/>
|
|
<doc>
|
|
<![CDATA[Find the counter for the given enum. The same enum will always return the
|
|
same counter.
|
|
@param key the counter key
|
|
@return the matching counter object]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getGroupNames" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the names of all counter classes.
|
|
@return Set of counter names.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="iterator" return="java.util.Iterator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getGroup" return="org.apache.hadoop.mapreduce.CounterGroup"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="groupName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Returns the named counter group, or an empty group if there is none
|
|
with the specified name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="countCounters" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the total number of counters, by summing the number of counters
|
|
in each group.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Write the set of groups.
|
|
The external format is:
|
|
#groups (groupName group)*
|
|
|
|
i.e. the number of groups followed by 0 or more groups, where each
|
|
group is of the form:
|
|
|
|
groupDisplayName #counters (false | true counter)*
|
|
|
|
where each counter is of the form:
|
|
|
|
name (false | true displayName) value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read a set of groups.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return textual representation of the counter values.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="incrAllCounters"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="org.apache.hadoop.mapreduce.Counters"/>
|
|
<doc>
|
|
<![CDATA[Increments multiple counters by their amounts in another Counters
|
|
instance.
|
|
@param other the other Counters instance]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericRight" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Counters -->
|
|
<!-- start class org.apache.hadoop.mapreduce.ID -->
|
|
<class name="ID" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.WritableComparable"/>
|
|
<constructor name="ID" type="int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[constructs an ID object from the given int]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="ID"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getId" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[returns the int which represents the identifier]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="compareTo" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="that" type="org.apache.hadoop.mapreduce.ID"/>
|
|
<doc>
|
|
<![CDATA[Compare IDs by associated numbers]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="SEPARATOR" type="char"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="id" type="int"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A general identifier, which internally stores the id
|
|
as an integer. This is the super class of {@link JobID},
|
|
{@link TaskID} and {@link TaskAttemptID}.
|
|
|
|
@see JobID
|
|
@see TaskID
|
|
@see TaskAttemptID]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.ID -->
|
|
<!-- start class org.apache.hadoop.mapreduce.InputFormat -->
|
|
<class name="InputFormat" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="InputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getSplits" return="java.util.List"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Logically split the set of input files for the job.
|
|
|
|
<p>Each {@link InputSplit} is then assigned to an individual {@link Mapper}
|
|
for processing.</p>
|
|
|
|
<p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the
|
|
input files are not physically split into chunks. For e.g. a split could
|
|
be <i><input-file-path, start, offset></i> tuple. The InputFormat
|
|
also creates the {@link RecordReader} to read the {@link InputSplit}.
|
|
|
|
@param context job configuration.
|
|
@return an array of {@link InputSplit}s for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Create a record reader for a given split. The framework will call
|
|
{@link RecordReader#initialize(InputSplit, TaskAttemptContext)} before
|
|
the split is used.
|
|
@param split the split to be read
|
|
@param context the information about the task
|
|
@return a new record reader
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>InputFormat</code> describes the input-specification for a
|
|
Map-Reduce job.
|
|
|
|
<p>The Map-Reduce framework relies on the <code>InputFormat</code> of the
|
|
job to:<p>
|
|
<ol>
|
|
<li>
|
|
Validate the input-specification of the job.
|
|
<li>
|
|
Split-up the input file(s) into logical {@link InputSplit}s, each of
|
|
which is then assigned to an individual {@link Mapper}.
|
|
</li>
|
|
<li>
|
|
Provide the {@link RecordReader} implementation to be used to glean
|
|
input records from the logical <code>InputSplit</code> for processing by
|
|
the {@link Mapper}.
|
|
</li>
|
|
</ol>
|
|
|
|
<p>The default behavior of file-based {@link InputFormat}s, typically
|
|
sub-classes of {@link FileInputFormat}, is to split the
|
|
input into <i>logical</i> {@link InputSplit}s based on the total size, in
|
|
bytes, of the input files. However, the {@link FileSystem} blocksize of
|
|
the input files is treated as an upper bound for input splits. A lower bound
|
|
on the split size can be set via
|
|
<a href="{@docRoot}/../mapred-default.html#mapreduce.input.fileinputformat.split.minsize">
|
|
mapreduce.input.fileinputformat.split.minsize</a>.</p>
|
|
|
|
<p>Clearly, logical splits based on input-size is insufficient for many
|
|
applications since record boundaries are to respected. In such cases, the
|
|
application has to also implement a {@link RecordReader} on whom lies the
|
|
responsibility to respect record-boundaries and present a record-oriented
|
|
view of the logical <code>InputSplit</code> to the individual task.
|
|
|
|
@see InputSplit
|
|
@see RecordReader
|
|
@see FileInputFormat]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.InputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.InputSplit -->
|
|
<class name="InputSplit" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="InputSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getLength" return="long"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the size of the split, so that the input splits can be sorted by size.
|
|
@return the number of bytes in the split
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the list of nodes by name where the data for the split would be local.
|
|
The locations do not need to be serialized.
|
|
@return a new array of the node nodes.
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>InputSplit</code> represents the data to be processed by an
|
|
individual {@link Mapper}.
|
|
|
|
<p>Typically, it presents a byte-oriented view on the input and is the
|
|
responsibility of {@link RecordReader} of the job to process this and present
|
|
a record-oriented view.
|
|
|
|
@see InputFormat
|
|
@see RecordReader]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.InputSplit -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Job -->
|
|
<class name="Job" extends="org.apache.hadoop.mapreduce.task.JobContextImpl"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<constructor name="Job"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<constructor name="Job" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<constructor name="Job" type="org.apache.hadoop.conf.Configuration, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="getInstance" return="org.apache.hadoop.mapreduce.Job"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cluster" type="org.apache.hadoop.mapreduce.Cluster"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getInstance" return="org.apache.hadoop.mapreduce.Job"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cluster" type="org.apache.hadoop.mapreduce.Cluster"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getInstance" return="org.apache.hadoop.mapreduce.Job"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cluster" type="org.apache.hadoop.mapreduce.Cluster"/>
|
|
<param name="status" type="org.apache.hadoop.mapreduce.JobStatus"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getStatus" return="org.apache.hadoop.mapreduce.JobStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getJobState" return="org.apache.hadoop.mapreduce.JobStatus.State"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Returns the current state of the Job.
|
|
|
|
@return JobStatus#State
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTrackingURL" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the URL where some job progress information will be displayed.
|
|
|
|
@return the URL where some job progress information will be displayed.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobFile" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the path of the submitted job configuration.
|
|
|
|
@return the path of the submitted job configuration.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getStartTime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get start time of the job.
|
|
|
|
@return the start time of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getFinishTime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get finish time of the job.
|
|
|
|
@return the finish time of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSchedulingInfo" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get scheduling info of the job.
|
|
|
|
@return the scheduling info of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPriority" return="org.apache.hadoop.mapreduce.JobPriority"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get scheduling info of the job.
|
|
|
|
@return the scheduling info of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The user-specified job name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getHistoryUrl" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="isRetired" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Dump stats to screen.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskReports" return="org.apache.hadoop.mapreduce.TaskReport[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="type" type="org.apache.hadoop.mapreduce.TaskType"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the information of the current state of the tasks of a job.
|
|
|
|
@param type Type of the task
|
|
@return the list of all of the map tips.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="mapProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the <i>progress</i> of the job's map-tasks, as a float between 0.0
|
|
and 1.0. When all map tasks have completed, the function returns 1.0.
|
|
|
|
@return the progress of the job's map-tasks.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reduceProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the <i>progress</i> of the job's reduce-tasks, as a float between 0.0
|
|
and 1.0. When all reduce tasks have completed, the function returns 1.0.
|
|
|
|
@return the progress of the job's reduce-tasks.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanupProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the <i>progress</i> of the job's cleanup-tasks, as a float between 0.0
|
|
and 1.0. When all cleanup tasks have completed, the function returns 1.0.
|
|
|
|
@return the progress of the job's cleanup-tasks.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setupProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the <i>progress</i> of the job's setup-tasks, as a float between 0.0
|
|
and 1.0. When all setup tasks have completed, the function returns 1.0.
|
|
|
|
@return the progress of the job's setup-tasks.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isComplete" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Check if the job is finished or not.
|
|
This is a non-blocking call.
|
|
|
|
@return <code>true</code> if the job is complete, else <code>false</code>.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isSuccessful" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Check if the job completed successfully.
|
|
|
|
@return <code>true</code> if the job succeeded, else <code>false</code>.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="killJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Kill the running job. Blocks until all job tasks have been
|
|
killed as well. If the job is no longer running, it simply returns.
|
|
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setPriority"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="priority" type="org.apache.hadoop.mapreduce.JobPriority"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Set the priority of a running job.
|
|
@param priority the new priority for the job.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskCompletionEvents" return="org.apache.hadoop.mapreduce.TaskCompletionEvent[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="startFrom" type="int"/>
|
|
<param name="numEvents" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get events indicating completion (success/failure) of component tasks.
|
|
|
|
@param startFrom index to start fetching events from
|
|
@param numEvents number of events to fetch
|
|
@return an array of {@link TaskCompletionEvent}s
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="killTask" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Kill indicated task attempt.
|
|
|
|
@param taskId the id of the task to be terminated.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="failTask" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Fail indicated task attempt.
|
|
|
|
@param taskId the id of the task to be terminated.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounters" return="org.apache.hadoop.mapreduce.Counters"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Gets the counters for this job. May return null if the job has been
|
|
retired and the job is no longer in the completed job store.
|
|
|
|
@return the counters for this job.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskDiagnostics" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Gets the diagnostic messages for a given task attempt.
|
|
@param taskid
|
|
@return the list of diagnostic messages for the task
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setNumReduceTasks"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="tasks" type="int"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the number of reduce tasks for the job.
|
|
@param tasks the number of reduce tasks
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setWorkingDirectory"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="dir" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Set the current working directory for the default file system.
|
|
|
|
@param dir the new current working directory.
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInputFormatClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link InputFormat} for the job.
|
|
@param cls the <code>InputFormat</code> to use
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputFormatClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link OutputFormat} for the job.
|
|
@param cls the <code>OutputFormat</code> to use
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapperClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Mapper} for the job.
|
|
@param cls the <code>Mapper</code> to use
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJarByClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the Jar by finding where a given class came from.
|
|
@param cls the example class]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJar"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jar" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the job jar]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setUser"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="user" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the reported username for this job.
|
|
|
|
@param user the username for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setCombinerClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the combiner class for the job.
|
|
@param cls the combiner to use
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setReducerClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Reducer} for the job.
|
|
@param cls the <code>Reducer</code> to use
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setPartitionerClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Partitioner} for the job.
|
|
@param cls the <code>Partitioner</code> to use
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapOutputKeyClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the key class for the map output data. This allows the user to
|
|
specify the map output key class to be different than the final output
|
|
value class.
|
|
|
|
@param theClass the map output key class.
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapOutputValueClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the value class for the map output data. This allows the user to
|
|
specify the map output value class to be different than the final output
|
|
value class.
|
|
|
|
@param theClass the map output value class.
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputKeyClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the key class for the job output data.
|
|
|
|
@param theClass the key class for the job output data.
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputValueClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the value class for job outputs.
|
|
|
|
@param theClass the value class for job outputs.
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSortComparatorClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Define the comparator that controls how the keys are sorted before they
|
|
are passed to the {@link Reducer}.
|
|
@param cls the raw comparator
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setGroupingComparatorClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Define the comparator that controls which keys are grouped together
|
|
for a single call to
|
|
{@link Reducer#reduce(Object, Iterable,
|
|
org.apache.hadoop.mapreduce.Reducer.Context)}
|
|
@param cls the raw comparator to use
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the user-specified job name.
|
|
|
|
@param name the job's new name.
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSpeculativeExecution"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="speculativeExecution" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Turn speculative execution on or off for this job.
|
|
|
|
@param speculativeExecution <code>true</code> if speculative execution
|
|
should be turned on, else <code>false</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapSpeculativeExecution"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="speculativeExecution" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Turn speculative execution on or off for this job for map tasks.
|
|
|
|
@param speculativeExecution <code>true</code> if speculative execution
|
|
should be turned on for map tasks,
|
|
else <code>false</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setReduceSpeculativeExecution"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="speculativeExecution" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Turn speculative execution on or off for this job for reduce tasks.
|
|
|
|
@param speculativeExecution <code>true</code> if speculative execution
|
|
should be turned on for reduce tasks,
|
|
else <code>false</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobSetupCleanupNeeded"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="needed" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Specify whether job-setup and job-cleanup is needed for the job
|
|
|
|
@param needed If <code>true</code>, job-setup and job-cleanup will be
|
|
considered from {@link OutputCommitter}
|
|
else ignored.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setCacheArchives"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="archives" type="java.net.URI[]"/>
|
|
<doc>
|
|
<![CDATA[Set the given set of archives
|
|
@param archives The list of archives that need to be localized]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setCacheFiles"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="files" type="java.net.URI[]"/>
|
|
<doc>
|
|
<![CDATA[Set the given set of files
|
|
@param files The list of files that need to be localized]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addCacheArchive"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="uri" type="java.net.URI"/>
|
|
<doc>
|
|
<![CDATA[Add a archives to be localized
|
|
@param uri The uri of the cache to be localized]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addCacheFile"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="uri" type="java.net.URI"/>
|
|
<doc>
|
|
<![CDATA[Add a file to be localized
|
|
@param uri The uri of the cache to be localized]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addFileToClassPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="file" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add an file path to the current set of classpath entries It adds the file
|
|
to cache as well.
|
|
|
|
@param file Path of the file to be added]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addArchiveToClassPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="archive" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add an archive path to the current set of classpath entries. It adds the
|
|
archive to cache as well.
|
|
|
|
@param archive Path of the archive to be added]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createSymlink"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[This method allows you to create symlinks in the current working directory
|
|
of the task to all the cache files/archives]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxMapAttempts"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="n" type="int"/>
|
|
<doc>
|
|
<![CDATA[Expert: Set the number of maximum attempts that will be made to run a
|
|
map task.
|
|
|
|
@param n the number of attempts per map task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxReduceAttempts"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="n" type="int"/>
|
|
<doc>
|
|
<![CDATA[Expert: Set the number of maximum attempts that will be made to run a
|
|
reduce task.
|
|
|
|
@param n the number of attempts per reduce task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setProfileEnabled"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="newValue" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the system should collect profiler information for some of
|
|
the tasks in this job? The information is stored in the user log
|
|
directory.
|
|
@param newValue true means it should be gathered]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setProfileParams"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="value" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the profiler configuration arguments. If the string contains a '%s' it
|
|
will be replaced with the name of the profiling output file when the task
|
|
runs.
|
|
|
|
This value is passed to the task child JVM on the command line.
|
|
|
|
@param value the configuration string]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setProfileTaskRange"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="isMap" type="boolean"/>
|
|
<param name="newValue" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the ranges of maps or reduces to profile. setProfileEnabled(true)
|
|
must also be called.
|
|
@param newValue a set of integer ranges of the map ids]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setCancelDelegationTokenUponJobCompletion"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="value" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Sets the flag that will allow the JobTracker to cancel the HDFS delegation
|
|
tokens upon job completion. Defaults to true.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="submit"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Submit the job to the cluster and return immediately.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="waitForCompletion" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="verbose" type="boolean"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Submit the job to the cluster and wait for it to finish.
|
|
@param verbose print the progress to the user
|
|
@return true if the job succeeded
|
|
@throws IOException thrown if the communication with the
|
|
<code>JobTracker</code> is lost]]>
|
|
</doc>
|
|
</method>
|
|
<method name="monitorAndPrintJob" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Monitor a job and print status in real-time as progress is made and tasks
|
|
fail.
|
|
@return true if the job succeeded
|
|
@throws IOException if communication to the JobTracker fails]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgressPollInterval" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[The interval at which monitorAndPrintJob() prints status]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCompletionPollInterval" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[The interval at which waitForCompletion() should check.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskOutputFilter" return="org.apache.hadoop.mapreduce.Job.TaskStatusFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Get the task output filter.
|
|
|
|
@param conf the configuration.
|
|
@return the filter level.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskOutputFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="newValue" type="org.apache.hadoop.mapreduce.Job.TaskStatusFilter"/>
|
|
<doc>
|
|
<![CDATA[Modify the Configuration to set the task output filter.
|
|
|
|
@param conf the Configuration to modify.
|
|
@param newValue the value to set.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="OUTPUT_FILTER" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="COMPLETION_POLL_INTERVAL_KEY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Key in mapred-*.xml that sets completionPollInvervalMillis]]>
|
|
</doc>
|
|
</field>
|
|
<field name="PROGRESS_MONITOR_POLL_INTERVAL_KEY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Key in mapred-*.xml that sets progMonitorPollIntervalMillis]]>
|
|
</doc>
|
|
</field>
|
|
<field name="USED_GENERIC_PARSER" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="SUBMIT_REPLICATION" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[The job submitter's view of the Job.
|
|
|
|
<p>It allows the user to configure the
|
|
job, submit it, control its execution, and query the state. The set methods
|
|
only work until the job is submitted, afterwards they will throw an
|
|
IllegalStateException. </p>
|
|
|
|
<p>
|
|
Normally the user creates the application, describes various facets of the
|
|
job via {@link Job} and then submits the job and monitor its progress.</p>
|
|
|
|
<p>Here is an example on how to submit a job:</p>
|
|
<p><blockquote><pre>
|
|
// Create a new Job
|
|
Job job = new Job(new Configuration());
|
|
job.setJarByClass(MyJob.class);
|
|
|
|
// Specify various job-specific parameters
|
|
job.setJobName("myjob");
|
|
|
|
job.setInputPath(new Path("in"));
|
|
job.setOutputPath(new Path("out"));
|
|
|
|
job.setMapperClass(MyJob.MyMapper.class);
|
|
job.setReducerClass(MyJob.MyReducer.class);
|
|
|
|
// Submit the job, then poll for progress until the job is complete
|
|
job.waitForCompletion(true);
|
|
</pre></blockquote></p>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Job -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Job.JobState -->
|
|
<class name="Job.JobState" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapreduce.Job.JobState[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapreduce.Job.JobState"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Job.JobState -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Job.TaskStatusFilter -->
|
|
<class name="Job.TaskStatusFilter" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapreduce.Job.TaskStatusFilter[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapreduce.Job.TaskStatusFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Job.TaskStatusFilter -->
|
|
<!-- start interface org.apache.hadoop.mapreduce.JobContext -->
|
|
<interface name="JobContext" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.MRJobConfig"/>
|
|
<method name="getConfiguration" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the configuration for the job.
|
|
@return the shared configuration object]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the unique ID for the job.
|
|
@return the object with the job id]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumReduceTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get configured the number of reduce tasks for this job. Defaults to
|
|
<code>1</code>.
|
|
@return the number of reduce tasks for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the current working directory for the default file system.
|
|
|
|
@return the directory name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the key class for the job output data.
|
|
@return the key class for the job output data.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the value class for job outputs.
|
|
@return the value class for job outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the key class for the map output data. If it is not set, use the
|
|
(final) output key class. This allows the map output key class to be
|
|
different than the final output key class.
|
|
@return the map output key class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the value class for the map output data. If it is not set, use the
|
|
(final) output value class This allows the map output value class to be
|
|
different than the final output value class.
|
|
|
|
@return the map output value class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user-specified job name. This is only used to identify the
|
|
job to the user.
|
|
|
|
@return the job's name, defaulting to "".]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputFormatClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link InputFormat} class for the job.
|
|
|
|
@return the {@link InputFormat} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapperClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Mapper} class for the job.
|
|
|
|
@return the {@link Mapper} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Get the combiner class for the job.
|
|
|
|
@return the combiner class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReducerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Reducer} class for the job.
|
|
|
|
@return the {@link Reducer} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputFormatClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link OutputFormat} class for the job.
|
|
|
|
@return the {@link OutputFormat} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPartitionerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Partitioner} class for the job.
|
|
|
|
@return the {@link Partitioner} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSortComparator" return="org.apache.hadoop.io.RawComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link RawComparator} comparator used to compare keys.
|
|
|
|
@return the {@link RawComparator} comparator used to compare keys.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJar" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the pathname of the job's jar.
|
|
@return the pathname]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getGroupingComparator" return="org.apache.hadoop.io.RawComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user defined {@link RawComparator} comparator for
|
|
grouping keys of inputs to the reduce.
|
|
|
|
@return comparator set by the user for grouping values.
|
|
@see Job#setGroupingComparatorClass(Class) for details.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobSetupCleanupNeeded" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get whether job-setup and job-cleanup is needed for the job
|
|
|
|
@return boolean]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProfileEnabled" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get whether the task profiling is enabled.
|
|
@return true if some tasks will be profiled]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProfileParams" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the profiler configuration arguments.
|
|
|
|
The default value for this property is
|
|
"-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s"
|
|
|
|
@return the parameters to pass to the task child to configure profiling]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProfileTaskRange" return="org.apache.hadoop.conf.Configuration.IntegerRanges"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="isMap" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Get the range of maps or reduces to profile.
|
|
@param isMap is the task a map?
|
|
@return the task ranges]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUser" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the reported username for this job.
|
|
|
|
@return the username]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSymlink" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[This method checks to see if symlinks are to be create for the
|
|
localized cache files in the current working directory
|
|
@return true if symlinks are to be created- else return false]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getArchiveClassPaths" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the archive entries in classpath as an array of Path]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCacheArchives" return="java.net.URI[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get cache archives set in the Configuration
|
|
@return A URI array of the caches set in the Configuration
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCacheFiles" return="java.net.URI[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get cache files set in the Configuration
|
|
@return A URI array of the files set in the Configuration
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocalCacheArchives" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Return the path array of the localized caches
|
|
@return A path array of localized caches
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocalCacheFiles" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Return the path array of the localized files
|
|
@return A path array of localized files
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getFileClassPaths" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the file entries in classpath as an array of Path]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getArchiveTimestamps" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the timestamps of the archives. Used by internal
|
|
DistributedCache and MapReduce code.
|
|
@return a string array of timestamps
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getFileTimestamps" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the timestamps of the files. Used by internal
|
|
DistributedCache and MapReduce code.
|
|
@return a string array of timestamps
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxMapAttempts" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the configured number of maximum attempts that will be made to run a
|
|
map task, as specified by the <code>mapred.map.max.attempts</code>
|
|
property. If this property is not already set, the default is 4 attempts.
|
|
|
|
@return the max number of attempts per map task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxReduceAttempts" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the configured number of maximum attempts that will be made to run a
|
|
reduce task, as specified by the <code>mapred.reduce.max.attempts</code>
|
|
property. If this property is not already set, the default is 4 attempts.
|
|
|
|
@return the max number of attempts per reduce task.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A read-only view of the job that is provided to the tasks while they
|
|
are running.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapreduce.JobContext -->
|
|
<!-- start class org.apache.hadoop.mapreduce.JobCounter -->
|
|
<class name="JobCounter" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapreduce.JobCounter[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapreduce.JobCounter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.JobCounter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.JobID -->
|
|
<class name="JobID" extends="org.apache.hadoop.mapred.ID"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="java.lang.Comparable"/>
|
|
<constructor name="JobID" type="java.lang.String, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a JobID object
|
|
@param jtIdentifier jobTracker identifier
|
|
@param id job number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobID"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getJtIdentifier" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="compareTo" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="org.apache.hadoop.mapreduce.ID"/>
|
|
<doc>
|
|
<![CDATA[Compare JobIds by first jtIdentifiers, then by job numbers]]>
|
|
</doc>
|
|
</method>
|
|
<method name="appendTo" return="java.lang.StringBuilder"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="builder" type="java.lang.StringBuilder"/>
|
|
<doc>
|
|
<![CDATA[Add the stuff after the "job" prefix to the given builder. This is useful,
|
|
because the sub-ids use this substring at the start of their string.
|
|
@param builder the builder to append to
|
|
@return the builder that was passed in]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="forName" return="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="str" type="java.lang.String"/>
|
|
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
|
|
<doc>
|
|
<![CDATA[Construct a JobId object from given string
|
|
@return constructed JobId object or null if the given String is null
|
|
@throws IllegalArgumentException if the given string is malformed]]>
|
|
</doc>
|
|
</method>
|
|
<field name="JOB" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="idFormat" type="java.text.NumberFormat"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[JobID represents the immutable and unique identifier for
|
|
the job. JobID consists of two parts. First part
|
|
represents the jobtracker identifier, so that jobID to jobtracker map
|
|
is defined. For cluster setup this string is the jobtracker
|
|
start time, for local setting, it is "local".
|
|
Second part of the JobID is the job number. <br>
|
|
An example JobID is :
|
|
<code>job_200707121733_0003</code> , which represents the third job
|
|
running at the jobtracker started at <code>200707121733</code>.
|
|
<p>
|
|
Applications should never construct or parse JobID strings, but rather
|
|
use appropriate constructors or {@link #forName(String)} method.
|
|
|
|
@see TaskID
|
|
@see TaskAttemptID
|
|
@see org.apache.hadoop.mapred.JobTracker#getNewJobId()
|
|
@see org.apache.hadoop.mapred.JobTracker#getStartTime()]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.JobID -->
|
|
<!-- start class org.apache.hadoop.mapreduce.JobPriority -->
|
|
<class name="JobPriority" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapreduce.JobPriority[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapreduce.JobPriority"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Used to describe the priority of the running job.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.JobPriority -->
|
|
<!-- start class org.apache.hadoop.mapreduce.JobStatus -->
|
|
<class name="JobStatus" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<implements name="java.lang.Cloneable"/>
|
|
<constructor name="JobStatus"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="JobStatus" type="org.apache.hadoop.mapreduce.JobID, float, float, float, float, org.apache.hadoop.mapreduce.JobStatus.State, org.apache.hadoop.mapreduce.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a job status object for a given jobid.
|
|
@param jobid The jobid of the job
|
|
@param setupProgress The progress made on the setup
|
|
@param mapProgress The progress made on the maps
|
|
@param reduceProgress The progress made on the reduces
|
|
@param cleanupProgress The progress made on the cleanup
|
|
@param runState The current state of the job
|
|
@param jp Priority of the job.
|
|
@param user userid of the person who submitted the job.
|
|
@param jobName user-specified job name.
|
|
@param jobFile job configuration file.
|
|
@param trackingUrl link to the web-ui for details of the job.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="setMapProgress"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="p" type="float"/>
|
|
<doc>
|
|
<![CDATA[Sets the map progress of this job
|
|
@param p The value of map progress to set to]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setCleanupProgress"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="p" type="float"/>
|
|
<doc>
|
|
<![CDATA[Sets the cleanup progress of this job
|
|
@param p The value of cleanup progress to set to]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSetupProgress"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="p" type="float"/>
|
|
<doc>
|
|
<![CDATA[Sets the setup progress of this job
|
|
@param p The value of setup progress to set to]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setReduceProgress"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="p" type="float"/>
|
|
<doc>
|
|
<![CDATA[Sets the reduce progress of this Job
|
|
@param p The value of reduce progress to set to]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setPriority"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="jp" type="org.apache.hadoop.mapreduce.JobPriority"/>
|
|
<doc>
|
|
<![CDATA[Set the priority of the job, defaulting to NORMAL.
|
|
@param jp new job priority]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setFinishTime"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="finishTime" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the finish time of the job
|
|
@param finishTime The finishTime of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setHistoryFile"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="historyFile" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the job history file url for a completed job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTrackingUrl"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="trackingUrl" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the link to the web-ui for details of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setRetired"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Set the job retire flag to true.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setState"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="state" type="org.apache.hadoop.mapreduce.JobStatus.State"/>
|
|
<doc>
|
|
<![CDATA[Change the current run state of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setStartTime"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="startTime" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the start time of the job
|
|
@param startTime The startTime of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setUsername"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="userName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[@param userName The username of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSchedulingInfo"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="schedulingInfo" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Used to set the scheduling information associated to a particular Job.
|
|
|
|
@param schedulingInfo Scheduling information of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobACLs"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="acls" type="java.util.Map"/>
|
|
</method>
|
|
<method name="getMapProgress" return="float"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Percentage of progress in maps]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCleanupProgress" return="float"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Percentage of progress in cleanup]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSetupProgress" return="float"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Percentage of progress in setup]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReduceProgress" return="float"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Percentage of progress in reduce]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getState" return="org.apache.hadoop.mapreduce.JobStatus.State"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return running state of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getStartTime" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return start time of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="clone" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return The jobid of the Job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUsername" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the username of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSchedulingInfo" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Gets the Scheduling information associated to a particular Job.
|
|
@return the scheduling information of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobACLs" return="java.util.Map"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getPriority" return="org.apache.hadoop.mapreduce.JobPriority"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the priority of the job
|
|
@return job priority]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isJobComplete" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns true if the status is for a completed job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getJobName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user-specified job name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobFile" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the configuration file for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTrackingUrl" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the link to the web-ui for details of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getFinishTime" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the finish time of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isRetired" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Check whether the job has retired.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getHistoryFile" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the job history file name for a completed job. If job is not
|
|
completed or history file not available then return null.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Describes the current status of a job.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.JobStatus -->
|
|
<!-- start class org.apache.hadoop.mapreduce.JobStatus.State -->
|
|
<class name="JobStatus.State" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapreduce.JobStatus.State[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapreduce.JobStatus.State"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getValue" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Current state of the job]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.JobStatus.State -->
|
|
<!-- start interface org.apache.hadoop.mapreduce.MapContext -->
|
|
<interface name="MapContext" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.TaskInputOutputContext"/>
|
|
<method name="getInputSplit" return="org.apache.hadoop.mapreduce.InputSplit"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the input split for this map.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The context that is given to the {@link Mapper}.
|
|
@param <KEYIN> the key input type to the Mapper
|
|
@param <VALUEIN> the value input type to the Mapper
|
|
@param <KEYOUT> the key output type from the Mapper
|
|
@param <VALUEOUT> the value output type from the Mapper]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapreduce.MapContext -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Mapper -->
|
|
<class name="Mapper" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Mapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Called once at the beginning of the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="KEYIN"/>
|
|
<param name="value" type="VALUEIN"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Called once for each key/value pair in the input split. Most applications
|
|
should override this, but the default is the identity function.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Called once at the end of the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Expert users can override this method for more complete control over the
|
|
execution of the Mapper.
|
|
@param context
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Maps input key/value pairs to a set of intermediate key/value pairs.
|
|
|
|
<p>Maps are the individual tasks which transform input records into a
|
|
intermediate records. The transformed intermediate records need not be of
|
|
the same type as the input records. A given input pair may map to zero or
|
|
many output pairs.</p>
|
|
|
|
<p>The Hadoop Map-Reduce framework spawns one map task for each
|
|
{@link InputSplit} generated by the {@link InputFormat} for the job.
|
|
<code>Mapper</code> implementations can access the {@link Configuration} for
|
|
the job via the {@link JobContext#getConfiguration()}.
|
|
|
|
<p>The framework first calls
|
|
{@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by
|
|
{@link #map(Object, Object, Context)}
|
|
for each key/value pair in the <code>InputSplit</code>. Finally
|
|
{@link #cleanup(Context)} is called.</p>
|
|
|
|
<p>All intermediate values associated with a given output key are
|
|
subsequently grouped by the framework, and passed to a {@link Reducer} to
|
|
determine the final output. Users can control the sorting and grouping by
|
|
specifying two key {@link RawComparator} classes.</p>
|
|
|
|
<p>The <code>Mapper</code> outputs are partitioned per
|
|
<code>Reducer</code>. Users can control which keys (and hence records) go to
|
|
which <code>Reducer</code> by implementing a custom {@link Partitioner}.
|
|
|
|
<p>Users can optionally specify a <code>combiner</code>, via
|
|
{@link Job#setCombinerClass(Class)}, to perform local aggregation of the
|
|
intermediate outputs, which helps to cut down the amount of data transferred
|
|
from the <code>Mapper</code> to the <code>Reducer</code>.
|
|
|
|
<p>Applications can specify if and how the intermediate
|
|
outputs are to be compressed and which {@link CompressionCodec}s are to be
|
|
used via the <code>Configuration</code>.</p>
|
|
|
|
<p>If the job has zero
|
|
reduces then the output of the <code>Mapper</code> is directly written
|
|
to the {@link OutputFormat} without sorting by keys.</p>
|
|
|
|
<p>Example:</p>
|
|
<p><blockquote><pre>
|
|
public class TokenCounterMapper
|
|
extends Mapper<Object, Text, Text, IntWritable>{
|
|
|
|
private final static IntWritable one = new IntWritable(1);
|
|
private Text word = new Text();
|
|
|
|
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
|
|
StringTokenizer itr = new StringTokenizer(value.toString());
|
|
while (itr.hasMoreTokens()) {
|
|
word.set(itr.nextToken());
|
|
context.write(word, one);
|
|
}
|
|
}
|
|
}
|
|
</pre></blockquote></p>
|
|
|
|
<p>Applications may override the {@link #run(Context)} method to exert
|
|
greater control on map processing e.g. multi-threaded <code>Mapper</code>s
|
|
etc.</p>
|
|
|
|
@see InputFormat
|
|
@see JobContext
|
|
@see Partitioner
|
|
@see Reducer]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Mapper -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Mapper.Context -->
|
|
<class name="Mapper.Context" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.MapContext"/>
|
|
<constructor name="Mapper.Context"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[The <code>Context</code> passed on to the {@link Mapper} implementations.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Mapper.Context -->
|
|
<!-- start class org.apache.hadoop.mapreduce.MarkableIterator -->
|
|
<class name="MarkableIterator" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.MarkableIteratorInterface"/>
|
|
<constructor name="MarkableIterator" type="java.util.Iterator"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a new iterator layered on the input iterator
|
|
@param itr underlying iterator that implements MarkableIteratorInterface]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="mark"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="clearMark"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="VALUE"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="remove"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>MarkableIterator</code> is a wrapper iterator class that
|
|
implements the {@link MarkableIteratorInterface}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.MarkableIterator -->
|
|
<!-- start class org.apache.hadoop.mapreduce.OutputCommitter -->
|
|
<class name="OutputCommitter" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="OutputCommitter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setupJob"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For the framework to setup the job output during initialization
|
|
|
|
@param jobContext Context of the job whose output is being written.
|
|
@throws IOException if temporary output could not be created]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanupJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link #commitJob(JobContext)} or
|
|
{@link #abortJob(JobContext, JobStatus.State)} instead.">
|
|
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For cleaning up the job's output after job completion
|
|
|
|
@param jobContext Context of the job whose output is being written.
|
|
@throws IOException
|
|
@deprecated Use {@link #commitJob(JobContext)} or
|
|
{@link #abortJob(JobContext, JobStatus.State)} instead.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="commitJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For committing job's output after successful job completion. Note that this
|
|
is invoked for jobs with final runstate as SUCCESSFUL.
|
|
|
|
@param jobContext Context of the job whose output is being written.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="abortJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<param name="state" type="org.apache.hadoop.mapreduce.JobStatus.State"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For aborting an unsuccessful job's output. Note that this is invoked for
|
|
jobs with final runstate as {@link JobStatus.State#FAILED} or
|
|
{@link JobStatus.State#KILLED}.
|
|
|
|
@param jobContext Context of the job whose output is being written.
|
|
@param state final runstate of the job
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setupTask"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Sets up output for the task.
|
|
|
|
@param taskContext Context of the task whose output is being written.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="needsTaskCommit" return="boolean"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Check whether task needs a commit
|
|
|
|
@param taskContext
|
|
@return true/false
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="commitTask"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[To promote the task's temporary output to final output location
|
|
|
|
The task's output is moved to the job's output directory.
|
|
|
|
@param taskContext Context of the task whose output is being written.
|
|
@throws IOException if commit is not]]>
|
|
</doc>
|
|
</method>
|
|
<method name="abortTask"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Discard the task output
|
|
|
|
@param taskContext
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>OutputCommitter</code> describes the commit of task output for a
|
|
Map-Reduce job.
|
|
|
|
<p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of
|
|
the job to:<p>
|
|
<ol>
|
|
<li>
|
|
Setup the job during initialization. For example, create the temporary
|
|
output directory for the job during the initialization of the job.
|
|
</li>
|
|
<li>
|
|
Cleanup the job after the job completion. For example, remove the
|
|
temporary output directory after the job completion.
|
|
</li>
|
|
<li>
|
|
Setup the task temporary output.
|
|
</li>
|
|
<li>
|
|
Check whether a task needs a commit. This is to avoid the commit
|
|
procedure if a task does not need commit.
|
|
</li>
|
|
<li>
|
|
Commit of the task output.
|
|
</li>
|
|
<li>
|
|
Discard the task commit.
|
|
</li>
|
|
</ol>
|
|
|
|
@see org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
|
|
@see JobContext
|
|
@see TaskAttemptContext]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.OutputCommitter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.OutputFormat -->
|
|
<class name="OutputFormat" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="OutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link RecordWriter} for the given task.
|
|
|
|
@param context the information about the current task.
|
|
@return a {@link RecordWriter} to write the output for the job.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Check for validity of the output-specification for the job.
|
|
|
|
<p>This is to validate the output specification for the job when it is
|
|
a job is submitted. Typically checks that it does not already exist,
|
|
throwing an exception when it already exists, so that output is not
|
|
overwritten.</p>
|
|
|
|
@param context information about the job
|
|
@throws IOException when output should not be attempted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the output committer for this output format. This is responsible
|
|
for ensuring the output is committed correctly.
|
|
@param context the task context
|
|
@return an output committer
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>OutputFormat</code> describes the output-specification for a
|
|
Map-Reduce job.
|
|
|
|
<p>The Map-Reduce framework relies on the <code>OutputFormat</code> of the
|
|
job to:<p>
|
|
<ol>
|
|
<li>
|
|
Validate the output-specification of the job. For e.g. check that the
|
|
output directory doesn't already exist.
|
|
<li>
|
|
Provide the {@link RecordWriter} implementation to be used to write out
|
|
the output files of the job. Output files are stored in a
|
|
{@link FileSystem}.
|
|
</li>
|
|
</ol>
|
|
|
|
@see RecordWriter]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.OutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Partitioner -->
|
|
<class name="Partitioner" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Partitioner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getPartition" return="int"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="KEY"/>
|
|
<param name="value" type="VALUE"/>
|
|
<param name="numPartitions" type="int"/>
|
|
<doc>
|
|
<![CDATA[Get the partition number for a given key (hence record) given the total
|
|
number of partitions i.e. number of reduce-tasks for the job.
|
|
|
|
<p>Typically a hash function on a all or a subset of the key.</p>
|
|
|
|
@param key the key to be partioned.
|
|
@param value the entry value.
|
|
@param numPartitions the total number of partitions.
|
|
@return the partition number for the <code>key</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Partitions the key space.
|
|
|
|
<p><code>Partitioner</code> controls the partitioning of the keys of the
|
|
intermediate map-outputs. The key (or a subset of the key) is used to derive
|
|
the partition, typically by a hash function. The total number of partitions
|
|
is the same as the number of reduce tasks for the job. Hence this controls
|
|
which of the <code>m</code> reduce tasks the intermediate key (and hence the
|
|
record) is sent for reduction.</p>
|
|
|
|
@see Reducer]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Partitioner -->
|
|
<!-- start class org.apache.hadoop.mapreduce.QueueAclsInfo -->
|
|
<class name="QueueAclsInfo" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="QueueAclsInfo"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Default constructor for QueueAclsInfo.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="QueueAclsInfo" type="java.lang.String, java.lang.String[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a new QueueAclsInfo object using the queue name and the
|
|
queue operations array
|
|
|
|
@param queueName Name of the job queue
|
|
@param operations]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getQueueName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get queue name.
|
|
|
|
@return name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setQueueName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="queueName" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getOperations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get opearations allowed on queue.
|
|
|
|
@return array of String]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Class to encapsulate Queue ACLs for a particular
|
|
user.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.QueueAclsInfo -->
|
|
<!-- start class org.apache.hadoop.mapreduce.QueueInfo -->
|
|
<class name="QueueInfo" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="QueueInfo"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Default constructor for QueueInfo.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="QueueInfo" type="java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a new QueueInfo object using the queue name and the
|
|
scheduling information passed.
|
|
|
|
@param queueName Name of the job queue
|
|
@param schedulingInfo Scheduling Information associated with the job
|
|
queue]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="QueueInfo" type="java.lang.String, java.lang.String, org.apache.hadoop.mapreduce.QueueState, org.apache.hadoop.mapreduce.JobStatus[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@param queueName
|
|
@param schedulingInfo
|
|
@param state
|
|
@param stats]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="setQueueName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="queueName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the queue name of the JobQueueInfo
|
|
|
|
@param queueName Name of the job queue.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getQueueName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the queue name from JobQueueInfo
|
|
|
|
@return queue name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSchedulingInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="schedulingInfo" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the scheduling information associated to particular job queue
|
|
|
|
@param schedulingInfo]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSchedulingInfo" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Gets the scheduling information associated to particular job queue.
|
|
If nothing is set would return <b>"N/A"</b>
|
|
|
|
@return Scheduling information associated to particular Job Queue]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setState"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="state" type="org.apache.hadoop.mapreduce.QueueState"/>
|
|
<doc>
|
|
<![CDATA[Set the state of the queue
|
|
@param state state of the queue.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getState" return="org.apache.hadoop.mapreduce.QueueState"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the queue state
|
|
@return the queue state.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobStatuses"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="stats" type="org.apache.hadoop.mapreduce.JobStatus[]"/>
|
|
</method>
|
|
<method name="getQueueChildren" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get immediate children.
|
|
|
|
@return list of QueueInfo]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setQueueChildren"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="children" type="java.util.List"/>
|
|
</method>
|
|
<method name="getProperties" return="java.util.Properties"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get properties.
|
|
|
|
@return Properties]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setProperties"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="props" type="java.util.Properties"/>
|
|
</method>
|
|
<method name="getJobStatuses" return="org.apache.hadoop.mapreduce.JobStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the jobs submitted to queue
|
|
@return list of JobStatus for the submitted jobs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Class that contains the information regarding the Job Queues which are
|
|
maintained by the Hadoop Map/Reduce framework.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.QueueInfo -->
|
|
<!-- start class org.apache.hadoop.mapreduce.QueueState -->
|
|
<class name="QueueState" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapreduce.QueueState[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapreduce.QueueState"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getStateName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the stateName]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getState" return="org.apache.hadoop.mapreduce.QueueState"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="state" type="java.lang.String"/>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Enum representing queue state]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.QueueState -->
|
|
<!-- start class org.apache.hadoop.mapreduce.RecordReader -->
|
|
<class name="RecordReader" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="java.io.Closeable"/>
|
|
<constructor name="RecordReader"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="initialize"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Called once at initialization.
|
|
@param split the split that defines the range of records to read
|
|
@param context the information about the task
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Read the next key, value pair.
|
|
@return true if a key/value pair was read
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentKey" return="KEYIN"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the current key
|
|
@return the current key or null if there is no current key
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentValue" return="VALUEIN"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the current value.
|
|
@return the object that was read
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[The current progress of the record reader through its data.
|
|
@return a number between 0.0 and 1.0 that is the fraction of the data read
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close the record reader.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The record reader breaks the data into key/value pairs for input to the
|
|
{@link Mapper}.
|
|
@param <KEYIN>
|
|
@param <VALUEIN>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.RecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.RecordWriter -->
|
|
<class name="RecordWriter" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="RecordWriter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="write"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Writes a key/value pair.
|
|
|
|
@param key the key to write.
|
|
@param value the value to write.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Close this <code>RecordWriter</code> to future operations.
|
|
|
|
@param context the context of the task
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>RecordWriter</code> writes the output <key, value> pairs
|
|
to an output file.
|
|
|
|
<p><code>RecordWriter</code> implementations write the job outputs to the
|
|
{@link FileSystem}.
|
|
|
|
@see OutputFormat]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.RecordWriter -->
|
|
<!-- start interface org.apache.hadoop.mapreduce.ReduceContext -->
|
|
<interface name="ReduceContext" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.TaskInputOutputContext"/>
|
|
<method name="nextKey" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Start processing next unique key.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getValues" return="java.lang.Iterable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Iterate through the values for the current key, reusing the same value
|
|
object, which is stored in the context.
|
|
@return the series of values associated with the current key. All of the
|
|
objects returned directly and indirectly from this method are reused.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The context passed to the {@link Reducer}.
|
|
@param <KEYIN> the class of the input keys
|
|
@param <VALUEIN> the class of the input values
|
|
@param <KEYOUT> the class of the output keys
|
|
@param <VALUEOUT> the class of the output values]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapreduce.ReduceContext -->
|
|
<!-- start interface org.apache.hadoop.mapreduce.ReduceContext.ValueIterator -->
|
|
<interface name="ReduceContext.ValueIterator" abstract="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.MarkableIteratorInterface"/>
|
|
<method name="resetBackupStore"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This method is called when the reducer moves from one key to
|
|
another.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link Iterator} to iterate over values for a given group of records.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapreduce.ReduceContext.ValueIterator -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Reducer -->
|
|
<class name="Reducer" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Reducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Called once at the start of the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="KEYIN"/>
|
|
<param name="values" type="java.lang.Iterable"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[This method is called once for each key. Most applications will define
|
|
their reduce class by overriding this method. The default implementation
|
|
is an identity function.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Called once at the end of the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Advanced application writers can use the
|
|
{@link #run(org.apache.hadoop.mapreduce.Reducer.Context)} method to
|
|
control how the reduce task works.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Reduces a set of intermediate values which share a key to a smaller set of
|
|
values.
|
|
|
|
<p><code>Reducer</code> implementations
|
|
can access the {@link Configuration} for the job via the
|
|
{@link JobContext#getConfiguration()} method.</p>
|
|
|
|
<p><code>Reducer</code> has 3 primary phases:</p>
|
|
<ol>
|
|
<li>
|
|
|
|
<h4 id="Shuffle">Shuffle</h4>
|
|
|
|
<p>The <code>Reducer</code> copies the sorted output from each
|
|
{@link Mapper} using HTTP across the network.</p>
|
|
</li>
|
|
|
|
<li>
|
|
<h4 id="Sort">Sort</h4>
|
|
|
|
<p>The framework merge sorts <code>Reducer</code> inputs by
|
|
<code>key</code>s
|
|
(since different <code>Mapper</code>s may have output the same key).</p>
|
|
|
|
<p>The shuffle and sort phases occur simultaneously i.e. while outputs are
|
|
being fetched they are merged.</p>
|
|
|
|
<h5 id="SecondarySort">SecondarySort</h5>
|
|
|
|
<p>To achieve a secondary sort on the values returned by the value
|
|
iterator, the application should extend the key with the secondary
|
|
key and define a grouping comparator. The keys will be sorted using the
|
|
entire key, but will be grouped using the grouping comparator to decide
|
|
which keys and values are sent in the same call to reduce.The grouping
|
|
comparator is specified via
|
|
{@link Job#setGroupingComparatorClass(Class)}. The sort order is
|
|
controlled by
|
|
{@link Job#setSortComparatorClass(Class)}.</p>
|
|
|
|
|
|
For example, say that you want to find duplicate web pages and tag them
|
|
all with the url of the "best" known example. You would set up the job
|
|
like:
|
|
<ul>
|
|
<li>Map Input Key: url</li>
|
|
<li>Map Input Value: document</li>
|
|
<li>Map Output Key: document checksum, url pagerank</li>
|
|
<li>Map Output Value: url</li>
|
|
<li>Partitioner: by checksum</li>
|
|
<li>OutputKeyComparator: by checksum and then decreasing pagerank</li>
|
|
<li>OutputValueGroupingComparator: by checksum</li>
|
|
</ul>
|
|
</li>
|
|
|
|
<li>
|
|
<h4 id="Reduce">Reduce</h4>
|
|
|
|
<p>In this phase the
|
|
{@link #reduce(Object, Iterable, Context)}
|
|
method is called for each <code><key, (collection of values)></code> in
|
|
the sorted inputs.</p>
|
|
<p>The output of the reduce task is typically written to a
|
|
{@link RecordWriter} via
|
|
{@link Context#write(Object, Object)}.</p>
|
|
</li>
|
|
</ol>
|
|
|
|
<p>The output of the <code>Reducer</code> is <b>not re-sorted</b>.</p>
|
|
|
|
<p>Example:</p>
|
|
<p><blockquote><pre>
|
|
public class IntSumReducer<Key> extends Reducer<Key,IntWritable,
|
|
Key,IntWritable> {
|
|
private IntWritable result = new IntWritable();
|
|
|
|
public void reduce(Key key, Iterable<IntWritable> values,
|
|
Context context) throws IOException, InterruptedException {
|
|
int sum = 0;
|
|
for (IntWritable val : values) {
|
|
sum += val.get();
|
|
}
|
|
result.set(sum);
|
|
context.write(key, result);
|
|
}
|
|
}
|
|
</pre></blockquote></p>
|
|
|
|
@see Mapper
|
|
@see Partitioner]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Reducer -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Reducer.Context -->
|
|
<class name="Reducer.Context" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.ReduceContext"/>
|
|
<constructor name="Reducer.Context"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[The <code>Context</code> passed on to the {@link Reducer} implementations.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Reducer.Context -->
|
|
<!-- start interface org.apache.hadoop.mapreduce.TaskAttemptContext -->
|
|
<interface name="TaskAttemptContext" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<implements name="org.apache.hadoop.util.Progressable"/>
|
|
<method name="getTaskAttemptID" return="org.apache.hadoop.mapreduce.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the unique name for this task attempt.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="msg" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the current status of the task to the given string.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getStatus" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the last set status message.
|
|
@return the current status message]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The context for task attempts.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapreduce.TaskAttemptContext -->
|
|
<!-- start class org.apache.hadoop.mapreduce.TaskAttemptID -->
|
|
<class name="TaskAttemptID" extends="org.apache.hadoop.mapred.ID"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskAttemptID" type="org.apache.hadoop.mapreduce.TaskID, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskAttemptID object from given {@link TaskID}.
|
|
@param taskId TaskID that this task belongs to
|
|
@param id the task attempt number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskAttemptID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskId object from given parts.
|
|
@param jtIdentifier jobTracker identifier
|
|
@param jobId job number
|
|
@param type the TaskType
|
|
@param taskId taskId number
|
|
@param id the task attempt number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskAttemptID"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the {@link JobID} object that this task attempt belongs to]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskID" return="org.apache.hadoop.mapreduce.TaskID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the {@link TaskID} object that this task attempt belongs to]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskType" return="org.apache.hadoop.mapreduce.TaskType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the TaskType of the TaskAttemptID]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="appendTo" return="java.lang.StringBuilder"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="builder" type="java.lang.StringBuilder"/>
|
|
<doc>
|
|
<![CDATA[Add the unique string to the StringBuilder
|
|
@param builder the builder to append ot
|
|
@return the builder that was passed in.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="compareTo" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="org.apache.hadoop.mapreduce.ID"/>
|
|
<doc>
|
|
<![CDATA[Compare TaskIds by first tipIds, then by task numbers.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="forName" return="org.apache.hadoop.mapreduce.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="str" type="java.lang.String"/>
|
|
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
|
|
<doc>
|
|
<![CDATA[Construct a TaskAttemptID object from given string
|
|
@return constructed TaskAttemptID object or null if the given String is null
|
|
@throws IllegalArgumentException if the given string is malformed]]>
|
|
</doc>
|
|
</method>
|
|
<field name="ATTEMPT" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[TaskAttemptID represents the immutable and unique identifier for
|
|
a task attempt. Each task attempt is one particular instance of a Map or
|
|
Reduce Task identified by its TaskID.
|
|
|
|
TaskAttemptID consists of 2 parts. First part is the
|
|
{@link TaskID}, that this TaskAttemptID belongs to.
|
|
Second part is the task attempt number. <br>
|
|
An example TaskAttemptID is :
|
|
<code>attempt_200707121733_0003_m_000005_0</code> , which represents the
|
|
zeroth task attempt for the fifth map task in the third job
|
|
running at the jobtracker started at <code>200707121733</code>.
|
|
<p>
|
|
Applications should never construct or parse TaskAttemptID strings
|
|
, but rather use appropriate constructors or {@link #forName(String)}
|
|
method.
|
|
|
|
@see JobID
|
|
@see TaskID]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.TaskAttemptID -->
|
|
<!-- start class org.apache.hadoop.mapreduce.TaskCompletionEvent -->
|
|
<class name="TaskCompletionEvent" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="TaskCompletionEvent"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Default constructor for Writable.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskCompletionEvent" type="int, org.apache.hadoop.mapreduce.TaskAttemptID, int, boolean, org.apache.hadoop.mapreduce.TaskCompletionEvent.Status, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructor. eventId should be created externally and incremented
|
|
per event for each job.
|
|
@param eventId event id, event id should be unique and assigned in
|
|
incrementally, starting from 0.
|
|
@param taskId task id
|
|
@param status task's status
|
|
@param taskTrackerHttp task tracker's host:port for http.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getEventId" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns event Id.
|
|
@return event id]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskAttemptId" return="org.apache.hadoop.mapreduce.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns task id.
|
|
@return task id]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getStatus" return="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns enum Status.SUCESS or Status.FAILURE.
|
|
@return task tracker status]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskTrackerHttp" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[http location of the tasktracker where this task ran.
|
|
@return http location of tasktracker user logs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskRunTime" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns time (in millisec) the task took to complete.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskRunTime"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="taskCompletionTime" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the task completion time
|
|
@param taskCompletionTime time (in millisec) the task took to complete]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setEventId"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="eventId" type="int"/>
|
|
<doc>
|
|
<![CDATA[set event Id. should be assigned incrementally starting from 0.
|
|
@param eventId]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskAttemptId"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
|
|
<doc>
|
|
<![CDATA[Sets task id.
|
|
@param taskId]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="status" type="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status"/>
|
|
<doc>
|
|
<![CDATA[Set task status.
|
|
@param status]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskTrackerHttp"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="taskTrackerHttp" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set task tracker http location.
|
|
@param taskTrackerHttp]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="isMapTask" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="idWithinJob" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="EMPTY_ARRAY" type="org.apache.hadoop.mapreduce.TaskCompletionEvent[]"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This is used to track task completion events on
|
|
job tracker.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.TaskCompletionEvent -->
|
|
<!-- start class org.apache.hadoop.mapreduce.TaskCompletionEvent.Status -->
|
|
<class name="TaskCompletionEvent.Status" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.TaskCompletionEvent.Status -->
|
|
<!-- start class org.apache.hadoop.mapreduce.TaskCounter -->
|
|
<class name="TaskCounter" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapreduce.TaskCounter[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapreduce.TaskCounter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.TaskCounter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.TaskID -->
|
|
<class name="TaskID" extends="org.apache.hadoop.mapred.ID"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, org.apache.hadoop.mapreduce.TaskType, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskID object from given {@link JobID}.
|
|
@param jobId JobID that this tip belongs to
|
|
@param type the {@link TaskType} of the task
|
|
@param id the tip number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskInProgressId object from given parts.
|
|
@param jtIdentifier jobTracker identifier
|
|
@param jobId job number
|
|
@param type the TaskType
|
|
@param id the tip number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskID"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the {@link JobID} object that this tip belongs to]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskType" return="org.apache.hadoop.mapreduce.TaskType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the type of the task]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="compareTo" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="org.apache.hadoop.mapreduce.ID"/>
|
|
<doc>
|
|
<![CDATA[Compare TaskInProgressIds by first jobIds, then by tip numbers. Reduces are
|
|
defined as greater then maps.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="appendTo" return="java.lang.StringBuilder"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="builder" type="java.lang.StringBuilder"/>
|
|
<doc>
|
|
<![CDATA[Add the unique string to the given builder.
|
|
@param builder the builder to append to
|
|
@return the builder that was passed in]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="forName" return="org.apache.hadoop.mapreduce.TaskID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="str" type="java.lang.String"/>
|
|
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
|
|
<doc>
|
|
<![CDATA[Construct a TaskID object from given string
|
|
@return constructed TaskID object or null if the given String is null
|
|
@throws IllegalArgumentException if the given string is malformed]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRepresentingCharacter" return="char"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="type" type="org.apache.hadoop.mapreduce.TaskType"/>
|
|
<doc>
|
|
<![CDATA[Gets the character representing the {@link TaskType}
|
|
@param type the TaskType
|
|
@return the character]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskType" return="org.apache.hadoop.mapreduce.TaskType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="c" type="char"/>
|
|
<doc>
|
|
<![CDATA[Gets the {@link TaskType} corresponding to the character
|
|
@param c the character
|
|
@return the TaskType]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getAllTaskTypes" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<field name="TASK" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="idFormat" type="java.text.NumberFormat"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[TaskID represents the immutable and unique identifier for
|
|
a Map or Reduce Task. Each TaskID encompasses multiple attempts made to
|
|
execute the Map or Reduce Task, each of which are uniquely indentified by
|
|
their TaskAttemptID.
|
|
|
|
TaskID consists of 3 parts. First part is the {@link JobID}, that this
|
|
TaskInProgress belongs to. Second part of the TaskID is either 'm' or 'r'
|
|
representing whether the task is a map task or a reduce task.
|
|
And the third part is the task number. <br>
|
|
An example TaskID is :
|
|
<code>task_200707121733_0003_m_000005</code> , which represents the
|
|
fifth map task in the third job running at the jobtracker
|
|
started at <code>200707121733</code>.
|
|
<p>
|
|
Applications should never construct or parse TaskID strings
|
|
, but rather use appropriate constructors or {@link #forName(String)}
|
|
method.
|
|
|
|
@see JobID
|
|
@see TaskAttemptID]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.TaskID -->
|
|
<!-- start interface org.apache.hadoop.mapreduce.TaskInputOutputContext -->
|
|
<interface name="TaskInputOutputContext" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Advance to the next key, value pair, returning null if at end.
|
|
@return the key object that was read into, or null if no more]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentKey" return="KEYIN"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the current key.
|
|
@return the current key object or null if there isn't one
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentValue" return="VALUEIN"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the current value.
|
|
@return the value object that was read into
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="KEYOUT"/>
|
|
<param name="value" type="VALUEOUT"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Generate an output key/value pair.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="counterName" type="java.lang.Enum"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Counter} for the given <code>counterName</code>.
|
|
@param counterName counter name
|
|
@return the <code>Counter</code> for the given <code>counterName</code>]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="groupName" type="java.lang.String"/>
|
|
<param name="counterName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Counter} for the given <code>groupName</code> and
|
|
<code>counterName</code>.
|
|
@param counterName counter name
|
|
@return the <code>Counter</code> for the given <code>groupName</code> and
|
|
<code>counterName</code>]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link OutputCommitter} for the task-attempt.
|
|
@return the <code>OutputCommitter</code> for the task-attempt]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A context object that allows input and output from the task. It is only
|
|
supplied to the {@link Mapper} or {@link Reducer}.
|
|
@param <KEYIN> the input key type for the task
|
|
@param <VALUEIN> the input value type for the task
|
|
@param <KEYOUT> the output key type for the task
|
|
@param <VALUEOUT> the output value type for the task]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapreduce.TaskInputOutputContext -->
|
|
<!-- start class org.apache.hadoop.mapreduce.TaskTrackerInfo -->
|
|
<class name="TaskTrackerInfo" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="TaskTrackerInfo"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="TaskTrackerInfo" type="java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="TaskTrackerInfo" type="java.lang.String, java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getTaskTrackerName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Gets the tasktracker's name.
|
|
|
|
@return tracker's name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isBlacklisted" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Whether tracker is blacklisted
|
|
@return true if tracker is blacklisted
|
|
false otherwise]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReasonForBlacklist" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Gets the reason for which the tasktracker was blacklisted.
|
|
|
|
@return reason which tracker was blacklisted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getBlacklistReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Gets a descriptive report about why the tasktracker was blacklisted.
|
|
|
|
@return report describing why the tasktracker was blacklisted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Information about TaskTracker.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.TaskTrackerInfo -->
|
|
<!-- start class org.apache.hadoop.mapreduce.TaskType -->
|
|
<class name="TaskType" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapreduce.TaskType[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapreduce.TaskType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Enum for map, reduce, job-setup, job-cleanup, task-cleanup task types.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.TaskType -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.aggregate">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum -->
|
|
<class name="DoubleValueSum" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="DoubleValueSum"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
an object whose string representation represents a double value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="double"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
a double value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSum" return="double"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return an array of one element. The element is a string
|
|
representation of the aggregated value. The return value is
|
|
expected to be used by the a combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that sums up a sequence of double
|
|
values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax -->
|
|
<class name="LongValueMax" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="LongValueMax"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[the default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
an object whose string representation represents a long value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="newVal" type="long"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param newVal
|
|
a long value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getVal" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return an array of one element. The element is a string
|
|
representation of the aggregated value. The return value is
|
|
expected to be used by the a combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that maintain the maximum of
|
|
a sequence of long values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin -->
|
|
<class name="LongValueMin" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="LongValueMin"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[the default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
an object whose string representation represents a long value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="newVal" type="long"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param newVal
|
|
a long value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getVal" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return an array of one element. The element is a string
|
|
representation of the aggregated value. The return value is
|
|
expected to be used by the a combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that maintain the minimum of
|
|
a sequence of long values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum -->
|
|
<class name="LongValueSum" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="LongValueSum"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[the default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
an object whose string representation represents a long value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="long"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
a long value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSum" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return an array of one element. The element is a string
|
|
representation of the aggregated value. The return value is
|
|
expected to be used by the a combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that sums up
|
|
a sequence of long values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax -->
|
|
<class name="StringValueMax" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="StringValueMax"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[the default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
a string.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getVal" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return an array of one element. The element is a string
|
|
representation of the aggregated value. The return value is
|
|
expected to be used by the a combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that maintain the biggest of
|
|
a sequence of strings.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin -->
|
|
<class name="StringValueMin" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="StringValueMin"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[the default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
a string.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getVal" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return an array of one element. The element is a string
|
|
representation of the aggregated value. The return value is
|
|
expected to be used by the a combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that maintain the smallest of
|
|
a sequence of strings.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount -->
|
|
<class name="UniqValueCount" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="UniqValueCount"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[the default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="UniqValueCount" type="long"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[constructor
|
|
@param maxNum the limit in the number of unique values to keep.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="setMaxItems" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="n" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the limit on the number of unique values
|
|
@param n the desired limit on the number of unique values
|
|
@return the new limit on the number of unique values]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
an object.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return the number of unique objects aggregated]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUniqueItems" return="java.util.Set"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the set of the unique objects]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return an array of the unique objects. The return value is
|
|
expected to be used by the a combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="MAX_NUM_UNIQUE_VALUES" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that dedupes a sequence of objects.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor -->
|
|
<class name="UserDefinedValueAggregatorDescriptor" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"/>
|
|
<constructor name="UserDefinedValueAggregatorDescriptor" type="java.lang.String, org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@param className the class name of the user defined descriptor class
|
|
@param conf a configure object used for decriptor configuration]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="createInstance" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="className" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Create an instance of the given class
|
|
@param className the name of the class
|
|
@return a dynamically created instance of the given class]]>
|
|
</doc>
|
|
</method>
|
|
<method name="generateKeyValPairs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Generate a list of aggregation-id/value pairs for the given
|
|
key/value pairs by delegating the invocation to the real object.
|
|
|
|
@param key
|
|
input key
|
|
@param val
|
|
input value
|
|
@return a list of aggregation id/value pairs. An aggregation id encodes an
|
|
aggregation type which is used to guide the way to aggregate the
|
|
value in the reduce/combiner phrase of an Aggregate based job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of this object.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Do nothing.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="theAggregatorDescriptor" type="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class implements a wrapper for a user defined value
|
|
aggregator descriptor.
|
|
It serves two functions: One is to create an object of
|
|
ValueAggregatorDescriptor from the name of a user defined class
|
|
that may be dynamically loaded. The other is to
|
|
delegate invocations of generateKeyValPairs function to the created object.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor -->
|
|
<!-- start interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator -->
|
|
<interface name="ValueAggregator" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val the value to be added]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of the agregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return an array of values as the outputs of the combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This interface defines the minimal protocol for value aggregators.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor -->
|
|
<class name="ValueAggregatorBaseDescriptor" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"/>
|
|
<constructor name="ValueAggregatorBaseDescriptor"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="generateEntry" return="java.util.Map.Entry"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="type" type="java.lang.String"/>
|
|
<param name="id" type="java.lang.String"/>
|
|
<param name="val" type="org.apache.hadoop.io.Text"/>
|
|
<doc>
|
|
<![CDATA[@param type the aggregation type
|
|
@param id the aggregation id
|
|
@param val the val associated with the id to be aggregated
|
|
@return an Entry whose key is the aggregation id prefixed with
|
|
the aggregation type.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="generateValueAggregator" return="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="type" type="java.lang.String"/>
|
|
<param name="uniqCount" type="long"/>
|
|
<doc>
|
|
<![CDATA[@param type the aggregation type
|
|
@param uniqCount the limit in the number of unique values to keep,
|
|
if type is UNIQ_VALUE_COUNT
|
|
@return a value aggregator of the given type.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="generateKeyValPairs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Generate 1 or 2 aggregation-id/value pairs for the given key/value pair.
|
|
The first id will be of type LONG_VALUE_SUM, with "record_count" as
|
|
its aggregation id. If the input is a file split,
|
|
the second id of the same type will be generated too, with the file name
|
|
as its aggregation id. This achieves the behavior of counting the total
|
|
number of records in the input data, and the number of records
|
|
in each input file.
|
|
|
|
@param key
|
|
input key
|
|
@param val
|
|
input value
|
|
@return a list of aggregation id/value pairs. An aggregation id encodes an
|
|
aggregation type which is used to guide the way to aggregate the
|
|
value in the reduce/combiner phrase of an Aggregate based job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[get the input file name.
|
|
|
|
@param conf a configuration object]]>
|
|
</doc>
|
|
</method>
|
|
<field name="UNIQ_VALUE_COUNT" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="LONG_VALUE_SUM" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="DOUBLE_VALUE_SUM" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="VALUE_HISTOGRAM" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="LONG_VALUE_MAX" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="LONG_VALUE_MIN" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="STRING_VALUE_MAX" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="STRING_VALUE_MIN" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="inputFile" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class implements the common functionalities of
|
|
the subclasses of ValueAggregatorDescriptor class.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorCombiner -->
|
|
<class name="ValueAggregatorCombiner" extends="org.apache.hadoop.mapreduce.Reducer"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ValueAggregatorCombiner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.Text"/>
|
|
<param name="values" type="java.lang.Iterable"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Combines values for a given key.
|
|
@param key the key is expected to be a Text object, whose prefix indicates
|
|
the type of aggregation to aggregate the values.
|
|
@param values the values to combine
|
|
@param context to collect combined values]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements the generic combiner of Aggregate.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorCombiner -->
|
|
<!-- start interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor -->
|
|
<interface name="ValueAggregatorDescriptor" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="generateKeyValPairs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Generate a list of aggregation-id/value pairs for
|
|
the given key/value pair.
|
|
This function is usually called by the mapper of an Aggregate based job.
|
|
|
|
@param key
|
|
input key
|
|
@param val
|
|
input value
|
|
@return a list of aggregation id/value pairs. An aggregation id encodes an
|
|
aggregation type which is used to guide the way to aggregate the
|
|
value in the reduce/combiner phrase of an Aggregate based job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Configure the object
|
|
|
|
@param conf
|
|
a Configuration object that may contain the information
|
|
that can be used to configure the object.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="TYPE_SEPARATOR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="ONE" type="org.apache.hadoop.io.Text"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This interface defines the contract a value aggregator descriptor must
|
|
support. Such a descriptor can be configured with a {@link Configuration}
|
|
object. Its main function is to generate a list of aggregation-id/value
|
|
pairs. An aggregation id encodes an aggregation type which is used to
|
|
guide the way to aggregate the value in the reduce/combiner phrase of an
|
|
Aggregate based job.
|
|
The mapper in an Aggregate based map/reduce job may create one or more of
|
|
ValueAggregatorDescriptor objects at configuration time. For each input
|
|
key/value pair, the mapper will use those objects to create aggregation
|
|
id/value pairs.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob -->
|
|
<class name="ValueAggregatorJob" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ValueAggregatorJob"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="createValueAggregatorJobs" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<param name="descriptors" type="java.lang.Class[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="createValueAggregatorJobs" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="createValueAggregatorJob" return="org.apache.hadoop.mapreduce.Job"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Create an Aggregate based map/reduce job.
|
|
|
|
@param conf The configuration for job
|
|
@param args the arguments used for job creation. Generic hadoop
|
|
arguments are accepted.
|
|
@return a Job object ready for submission.
|
|
|
|
@throws IOException
|
|
@see GenericOptionsParser]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValueAggregatorJob" return="org.apache.hadoop.mapreduce.Job"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<param name="descriptors" type="java.lang.Class[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="setAggregatorDescriptors" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="descriptors" type="java.lang.Class[]"/>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[create and run an Aggregate based map/reduce job.
|
|
|
|
@param args the arguments used for job creation
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This is the main class for creating a map/reduce job using Aggregate
|
|
framework. The Aggregate is a specialization of map/reduce framework,
|
|
specializing for performing various simple aggregations.
|
|
|
|
Generally speaking, in order to implement an application using Map/Reduce
|
|
model, the developer is to implement Map and Reduce functions (and possibly
|
|
combine function). However, a lot of applications related to counting and
|
|
statistics computing have very similar characteristics. Aggregate abstracts
|
|
out the general patterns of these functions and implementing those patterns.
|
|
In particular, the package provides generic mapper/redducer/combiner
|
|
classes, and a set of built-in value aggregators, and a generic utility
|
|
class that helps user create map/reduce jobs using the generic class.
|
|
The built-in aggregators include:
|
|
|
|
sum over numeric values count the number of distinct values compute the
|
|
histogram of values compute the minimum, maximum, media,average, standard
|
|
deviation of numeric values
|
|
|
|
The developer using Aggregate will need only to provide a plugin class
|
|
conforming to the following interface:
|
|
|
|
public interface ValueAggregatorDescriptor { public ArrayList<Entry>
|
|
generateKeyValPairs(Object key, Object value); public void
|
|
configure(Configuration conf); }
|
|
|
|
The package also provides a base class, ValueAggregatorBaseDescriptor,
|
|
implementing the above interface. The user can extend the base class and
|
|
implement generateKeyValPairs accordingly.
|
|
|
|
The primary work of generateKeyValPairs is to emit one or more key/value
|
|
pairs based on the input key/value pair. The key in an output key/value pair
|
|
encode two pieces of information: aggregation type and aggregation id. The
|
|
value will be aggregated onto the aggregation id according the aggregation
|
|
type.
|
|
|
|
This class offers a function to generate a map/reduce job using Aggregate
|
|
framework. The function takes the following parameters: input directory spec
|
|
input format (text or sequence file) output directory a file specifying the
|
|
user plugin class]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJobBase -->
|
|
<class name="ValueAggregatorJobBase" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ValueAggregatorJobBase"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="getValueAggregatorDescriptor" return="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="spec" type="java.lang.String"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="getAggregatorDescriptors" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="logSpec"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<field name="DESCRIPTOR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="DESCRIPTOR_NUM" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="USER_JAR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="aggregatorDescriptorList" type="java.util.ArrayList"
|
|
transient="false" volatile="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This abstract class implements some common functionalities of the
|
|
the generic mapper, reducer and combiner classes of Aggregate.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJobBase -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorMapper -->
|
|
<class name="ValueAggregatorMapper" extends="org.apache.hadoop.mapreduce.Mapper"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ValueAggregatorMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K1"/>
|
|
<param name="value" type="V1"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[the map function. It iterates through the value aggregator descriptor
|
|
list to generate aggregation id/value pairs and emit them.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements the generic mapper of Aggregate.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorMapper -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorReducer -->
|
|
<class name="ValueAggregatorReducer" extends="org.apache.hadoop.mapreduce.Reducer"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ValueAggregatorReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.Text"/>
|
|
<param name="values" type="java.lang.Iterable"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[@param key
|
|
the key is expected to be a Text object, whose prefix indicates
|
|
the type of aggregation to aggregate the values. In effect, data
|
|
driven computing is achieved. It is assumed that each aggregator's
|
|
getReport method emits appropriate output for the aggregator. This
|
|
may be further customized.
|
|
@param values the values to be aggregated
|
|
@param context]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements the generic reducer of Aggregate.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorReducer -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram -->
|
|
<class name="ValueHistogram" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="ValueHistogram"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add the given val to the aggregator.
|
|
|
|
@param val the value to be added. It is expected to be a string
|
|
in the form of xxxx\tnum, meaning xxxx has num occurrences.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of this aggregator.
|
|
It includes the following basic statistics of the histogram:
|
|
the number of unique values
|
|
the minimum value
|
|
the media value
|
|
the maximum value
|
|
the average value
|
|
the standard deviation]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReportDetails" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return a string representation of the list of value/frequence pairs of
|
|
the histogram]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return a list value/frequence pairs.
|
|
The return value is expected to be used by the reducer.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReportItems" return="java.util.TreeMap"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return a TreeMap representation of the histogram]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that computes the
|
|
histogram of a sequence of strings.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.chain">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.chain.ChainMapper -->
|
|
<class name="ChainMapper" extends="org.apache.hadoop.mapreduce.Mapper"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ChainMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="addMapper"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="klass" type="java.lang.Class"/>
|
|
<param name="inputKeyClass" type="java.lang.Class"/>
|
|
<param name="inputValueClass" type="java.lang.Class"/>
|
|
<param name="outputKeyClass" type="java.lang.Class"/>
|
|
<param name="outputValueClass" type="java.lang.Class"/>
|
|
<param name="mapperConf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Adds a {@link Mapper} class to the chain mapper.
|
|
|
|
<p>
|
|
The key and values are passed from one element of the chain to the next, by
|
|
value. For the added Mapper the configuration given for it,
|
|
<code>mapperConf</code>, have precedence over the job's Configuration. This
|
|
precedence is in effect when the task is running.
|
|
</p>
|
|
<p>
|
|
IMPORTANT: There is no need to specify the output key/value classes for the
|
|
ChainMapper, this is done by the addMapper for the last mapper in the chain
|
|
</p>
|
|
|
|
@param job
|
|
The job.
|
|
@param klass
|
|
the Mapper class to add.
|
|
@param inputKeyClass
|
|
mapper input key class.
|
|
@param inputValueClass
|
|
mapper input value class.
|
|
@param outputKeyClass
|
|
mapper output key class.
|
|
@param outputValueClass
|
|
mapper output value class.
|
|
@param mapperConf
|
|
a configuration for the Mapper class. It is recommended to use a
|
|
Configuration without default values using the
|
|
<code>Configuration(boolean loadDefaults)</code> constructor with
|
|
FALSE.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
</method>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The ChainMapper class allows to use multiple Mapper classes within a single
|
|
Map task.
|
|
|
|
<p>
|
|
The Mapper classes are invoked in a chained (or piped) fashion, the output of
|
|
the first becomes the input of the second, and so on until the last Mapper,
|
|
the output of the last Mapper will be written to the task's output.
|
|
</p>
|
|
<p>
|
|
The key functionality of this feature is that the Mappers in the chain do not
|
|
need to be aware that they are executed in a chain. This enables having
|
|
reusable specialized Mappers that can be combined to perform composite
|
|
operations within a single task.
|
|
</p>
|
|
<p>
|
|
Special care has to be taken when creating chains that the key/values output
|
|
by a Mapper are valid for the following Mapper in the chain. It is assumed
|
|
all Mappers and the Reduce in the chain use matching output and input key and
|
|
value classes as no conversion is done by the chaining code.
|
|
</p>
|
|
<p>
|
|
Using the ChainMapper and the ChainReducer classes is possible to compose
|
|
Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And
|
|
immediate benefit of this pattern is a dramatic reduction in disk IO.
|
|
</p>
|
|
<p>
|
|
IMPORTANT: There is no need to specify the output key/value classes for the
|
|
ChainMapper, this is done by the addMapper for the last mapper in the chain.
|
|
</p>
|
|
ChainMapper usage pattern:
|
|
<p/>
|
|
|
|
<pre>
|
|
...
|
|
Job = new Job(conf);
|
|
<p/>
|
|
Configuration mapAConf = new Configuration(false);
|
|
...
|
|
ChainMapper.addMapper(job, AMap.class, LongWritable.class, Text.class,
|
|
Text.class, Text.class, true, mapAConf);
|
|
<p/>
|
|
Configuration mapBConf = new Configuration(false);
|
|
...
|
|
ChainMapper.addMapper(job, BMap.class, Text.class, Text.class,
|
|
LongWritable.class, Text.class, false, mapBConf);
|
|
<p/>
|
|
...
|
|
<p/>
|
|
job.waitForComplettion(true);
|
|
...
|
|
</pre>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.chain.ChainMapper -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.chain.ChainReducer -->
|
|
<class name="ChainReducer" extends="org.apache.hadoop.mapreduce.Reducer"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ChainReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setReducer"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="klass" type="java.lang.Class"/>
|
|
<param name="inputKeyClass" type="java.lang.Class"/>
|
|
<param name="inputValueClass" type="java.lang.Class"/>
|
|
<param name="outputKeyClass" type="java.lang.Class"/>
|
|
<param name="outputValueClass" type="java.lang.Class"/>
|
|
<param name="reducerConf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Sets the {@link Reducer} class to the chain job.
|
|
|
|
<p>
|
|
The key and values are passed from one element of the chain to the next, by
|
|
value. For the added Reducer the configuration given for it,
|
|
<code>reducerConf</code>, have precedence over the job's Configuration.
|
|
This precedence is in effect when the task is running.
|
|
</p>
|
|
<p>
|
|
IMPORTANT: There is no need to specify the output key/value classes for the
|
|
ChainReducer, this is done by the setReducer or the addMapper for the last
|
|
element in the chain.
|
|
</p>
|
|
|
|
@param job
|
|
the job
|
|
@param klass
|
|
the Reducer class to add.
|
|
@param inputKeyClass
|
|
reducer input key class.
|
|
@param inputValueClass
|
|
reducer input value class.
|
|
@param outputKeyClass
|
|
reducer output key class.
|
|
@param outputValueClass
|
|
reducer output value class.
|
|
@param reducerConf
|
|
a configuration for the Reducer class. It is recommended to use a
|
|
Configuration without default values using the
|
|
<code>Configuration(boolean loadDefaults)</code> constructor with
|
|
FALSE.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addMapper"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="klass" type="java.lang.Class"/>
|
|
<param name="inputKeyClass" type="java.lang.Class"/>
|
|
<param name="inputValueClass" type="java.lang.Class"/>
|
|
<param name="outputKeyClass" type="java.lang.Class"/>
|
|
<param name="outputValueClass" type="java.lang.Class"/>
|
|
<param name="mapperConf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Adds a {@link Mapper} class to the chain reducer.
|
|
|
|
<p>
|
|
The key and values are passed from one element of the chain to the next, by
|
|
value For the added Mapper the configuration given for it,
|
|
<code>mapperConf</code>, have precedence over the job's Configuration. This
|
|
precedence is in effect when the task is running.
|
|
</p>
|
|
<p>
|
|
IMPORTANT: There is no need to specify the output key/value classes for the
|
|
ChainMapper, this is done by the addMapper for the last mapper in the
|
|
chain.
|
|
</p>
|
|
|
|
@param job
|
|
The job.
|
|
@param klass
|
|
the Mapper class to add.
|
|
@param inputKeyClass
|
|
mapper input key class.
|
|
@param inputValueClass
|
|
mapper input value class.
|
|
@param outputKeyClass
|
|
mapper output key class.
|
|
@param outputValueClass
|
|
mapper output value class.
|
|
@param mapperConf
|
|
a configuration for the Mapper class. It is recommended to use a
|
|
Configuration without default values using the
|
|
<code>Configuration(boolean loadDefaults)</code> constructor with
|
|
FALSE.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
</method>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The ChainReducer class allows to chain multiple Mapper classes after a
|
|
Reducer within the Reducer task.
|
|
|
|
<p>
|
|
For each record output by the Reducer, the Mapper classes are invoked in a
|
|
chained (or piped) fashion. The output of the reducer becomes the input of
|
|
the first mapper and output of first becomes the input of the second, and so
|
|
on until the last Mapper, the output of the last Mapper will be written to
|
|
the task's output.
|
|
</p>
|
|
<p>
|
|
The key functionality of this feature is that the Mappers in the chain do not
|
|
need to be aware that they are executed after the Reducer or in a chain. This
|
|
enables having reusable specialized Mappers that can be combined to perform
|
|
composite operations within a single task.
|
|
</p>
|
|
<p>
|
|
Special care has to be taken when creating chains that the key/values output
|
|
by a Mapper are valid for the following Mapper in the chain. It is assumed
|
|
all Mappers and the Reduce in the chain use matching output and input key and
|
|
value classes as no conversion is done by the chaining code.
|
|
</p>
|
|
</p> Using the ChainMapper and the ChainReducer classes is possible to
|
|
compose Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And
|
|
immediate benefit of this pattern is a dramatic reduction in disk IO. </p>
|
|
<p>
|
|
IMPORTANT: There is no need to specify the output key/value classes for the
|
|
ChainReducer, this is done by the setReducer or the addMapper for the last
|
|
element in the chain.
|
|
</p>
|
|
ChainReducer usage pattern:
|
|
<p/>
|
|
|
|
<pre>
|
|
...
|
|
Job = new Job(conf);
|
|
....
|
|
<p/>
|
|
Configuration reduceConf = new Configuration(false);
|
|
...
|
|
ChainReducer.setReducer(job, XReduce.class, LongWritable.class, Text.class,
|
|
Text.class, Text.class, true, reduceConf);
|
|
<p/>
|
|
ChainReducer.addMapper(job, CMap.class, Text.class, Text.class,
|
|
LongWritable.class, Text.class, false, null);
|
|
<p/>
|
|
ChainReducer.addMapper(job, DMap.class, LongWritable.class, Text.class,
|
|
LongWritable.class, LongWritable.class, true, null);
|
|
<p/>
|
|
...
|
|
<p/>
|
|
job.waitForCompletion(true);
|
|
...
|
|
</pre>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.chain.ChainReducer -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.db">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.BigDecimalSplitter -->
|
|
<class name="BigDecimalSplitter" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/>
|
|
<constructor name="BigDecimalSplitter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="split" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="results" type="java.sql.ResultSet"/>
|
|
<param name="colName" type="java.lang.String"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</method>
|
|
<method name="tryDivide" return="java.math.BigDecimal"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="numerator" type="java.math.BigDecimal"/>
|
|
<param name="denominator" type="java.math.BigDecimal"/>
|
|
<doc>
|
|
<![CDATA[Divide numerator by denominator. If impossible in exact mode, use rounding.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Implement DBSplitter over BigDecimal values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.BigDecimalSplitter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.BooleanSplitter -->
|
|
<class name="BooleanSplitter" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/>
|
|
<constructor name="BooleanSplitter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="split" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="results" type="java.sql.ResultSet"/>
|
|
<param name="colName" type="java.lang.String"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Implement DBSplitter over boolean values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.BooleanSplitter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat -->
|
|
<class name="DataDrivenDBInputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DBInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.conf.Configurable"/>
|
|
<constructor name="DataDrivenDBInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getSplitter" return="org.apache.hadoop.mapreduce.lib.db.DBSplitter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="sqlDataType" type="int"/>
|
|
<doc>
|
|
<![CDATA[@return the DBSplitter implementation to use to divide the table/query into InputSplits.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplits" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getBoundingValsQuery" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return a query which returns the minimum and maximum values for
|
|
the order-by column.
|
|
|
|
The min value should be in the first column, and the
|
|
max value should be in the second column of the results.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setBoundingQuery"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="query" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the user-defined bounding query to use with a user-defined query.
|
|
This *must* include the substring "$CONDITIONS"
|
|
(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) inside the WHERE clause,
|
|
so that DataDrivenDBInputFormat knows where to insert split clauses.
|
|
e.g., "SELECT foo FROM mytable WHERE $CONDITIONS"
|
|
This will be expanded to something like:
|
|
SELECT foo FROM mytable WHERE (id > 100) AND (id < 250)
|
|
inside each split.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createDBRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="setInput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="inputClass" type="java.lang.Class"/>
|
|
<param name="tableName" type="java.lang.String"/>
|
|
<param name="conditions" type="java.lang.String"/>
|
|
<param name="splitBy" type="java.lang.String"/>
|
|
<param name="fieldNames" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[Note that the "orderBy" column is called the "splitBy" in this version.
|
|
We reuse the same field, but it's not strictly ordering it -- just partitioning
|
|
the results.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="inputClass" type="java.lang.Class"/>
|
|
<param name="inputQuery" type="java.lang.String"/>
|
|
<param name="inputBoundingQuery" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[setInput() takes a custom query and a separate "bounding query" to use
|
|
instead of the custom "count query" used by DBInputFormat.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="SUBSTITUTE_TOKEN" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[If users are providing their own query, the following string is expected to
|
|
appear in the WHERE clause, which will be substituted with a pair of conditions
|
|
on the input to allow input splits to parallelise the import.]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A InputFormat that reads input data from an SQL table.
|
|
Operates like DBInputFormat, but instead of using LIMIT and OFFSET to demarcate
|
|
splits, it tries to generate WHERE clauses which separate the data into roughly
|
|
equivalent shards.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit -->
|
|
<class name="DataDrivenDBInputFormat.DataDrivenDBInputSplit" extends="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="DataDrivenDBInputFormat.DataDrivenDBInputSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Default Constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="DataDrivenDBInputFormat.DataDrivenDBInputSplit" type="java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Convenience Constructor
|
|
@param lower the string to be put in the WHERE clause to guard on the 'lower' end
|
|
@param upper the string to be put in the WHERE clause to guard on the 'upper' end]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@return The total row count in this split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="input" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="output" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLowerClause" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getUpperClause" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A InputSplit that spans a set of rows]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader -->
|
|
<class name="DataDrivenDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DBRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="DataDrivenDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
<doc>
|
|
<![CDATA[@param split The InputSplit to read data for
|
|
@throws SQLException]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getSelectQuery" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the query for selecting the records,
|
|
subclasses can override this for custom behaviour.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A RecordReader that reads records from a SQL table,
|
|
using data-driven WHERE clause splits.
|
|
Emits LongWritables containing the record number as
|
|
key and DBWritables as value.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.DateSplitter -->
|
|
<class name="DateSplitter" extends="org.apache.hadoop.mapreduce.lib.db.IntegerSplitter"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="DateSplitter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="split" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="results" type="java.sql.ResultSet"/>
|
|
<param name="colName" type="java.lang.String"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</method>
|
|
<method name="dateToString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="d" type="java.util.Date"/>
|
|
<doc>
|
|
<![CDATA[Given a Date 'd', format it as a string for use in a SQL date
|
|
comparison operation.
|
|
@param d the date to format.
|
|
@return the string representing this date in SQL with any appropriate
|
|
quotation characters, etc.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Implement DBSplitter over date/time values.
|
|
Make use of logic from IntegerSplitter, since date/time are just longs
|
|
in Java.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.DateSplitter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.DBConfiguration -->
|
|
<class name="DBConfiguration" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="DBConfiguration" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configureDB"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="driverClass" type="java.lang.String"/>
|
|
<param name="dbUrl" type="java.lang.String"/>
|
|
<param name="userName" type="java.lang.String"/>
|
|
<param name="passwd" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Sets the DB access related fields in the {@link Configuration}.
|
|
@param conf the configuration
|
|
@param driverClass JDBC Driver class name
|
|
@param dbUrl JDBC DB access URL.
|
|
@param userName DB access username
|
|
@param passwd DB access passwd]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configureDB"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="driverClass" type="java.lang.String"/>
|
|
<param name="dbUrl" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Sets the DB access related fields in the JobConf.
|
|
@param job the job
|
|
@param driverClass JDBC Driver class name
|
|
@param dbUrl JDBC DB access URL.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getConnection" return="java.sql.Connection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
<doc>
|
|
<![CDATA[Returns a connection object o the DB
|
|
@throws ClassNotFoundException
|
|
@throws SQLException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getInputTableName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setInputTableName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="tableName" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getInputFieldNames" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setInputFieldNames"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="fieldNames" type="java.lang.String[]"/>
|
|
</method>
|
|
<method name="getInputConditions" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setInputConditions"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conditions" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getInputOrderBy" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setInputOrderBy"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="orderby" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getInputQuery" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setInputQuery"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="query" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getInputCountQuery" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setInputCountQuery"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="query" type="java.lang.String"/>
|
|
</method>
|
|
<method name="setInputBoundingQuery"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="query" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getInputBoundingQuery" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getInputClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setInputClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="inputClass" type="java.lang.Class"/>
|
|
</method>
|
|
<method name="getOutputTableName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setOutputTableName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="tableName" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getOutputFieldNames" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setOutputFieldNames"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="fieldNames" type="java.lang.String[]"/>
|
|
</method>
|
|
<method name="setOutputFieldCount"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="fieldCount" type="int"/>
|
|
</method>
|
|
<method name="getOutputFieldCount" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<field name="DRIVER_CLASS_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The JDBC Driver class name]]>
|
|
</doc>
|
|
</field>
|
|
<field name="URL_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[JDBC Database access URL]]>
|
|
</doc>
|
|
</field>
|
|
<field name="USERNAME_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[User name to access the database]]>
|
|
</doc>
|
|
</field>
|
|
<field name="PASSWORD_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Password to access the database]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_TABLE_NAME_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Input table name]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_FIELD_NAMES_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Field names in the Input table]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_CONDITIONS_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[WHERE clause in the input SELECT statement]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_ORDER_BY_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[ORDER BY clause in the input SELECT statement]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_QUERY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Whole input query, exluding LIMIT...OFFSET]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_COUNT_QUERY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Input query to get the count of records]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_BOUNDING_QUERY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Input query to get the max and min values of the jdbc.input.query]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_CLASS_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Class name implementing DBWritable which will hold input tuples]]>
|
|
</doc>
|
|
</field>
|
|
<field name="OUTPUT_TABLE_NAME_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Output table name]]>
|
|
</doc>
|
|
</field>
|
|
<field name="OUTPUT_FIELD_NAMES_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Field names in the Output table]]>
|
|
</doc>
|
|
</field>
|
|
<field name="OUTPUT_FIELD_COUNT_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Number of fields in the Output table]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A container for configuration property names for jobs with DB input/output.
|
|
|
|
The job can be configured using the static methods in this class,
|
|
{@link DBInputFormat}, and {@link DBOutputFormat}.
|
|
Alternatively, the properties can be set in the configuration with proper
|
|
values.
|
|
|
|
@see DBConfiguration#configureDB(Configuration, String, String, String, String)
|
|
@see DBInputFormat#setInput(Job, Class, String, String)
|
|
@see DBInputFormat#setInput(Job, Class, String, String, String, String...)
|
|
@see DBOutputFormat#setOutput(Job, String, String...)]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.DBConfiguration -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.DBInputFormat -->
|
|
<class name="DBInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.conf.Configurable"/>
|
|
<constructor name="DBInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getDBConf" return="org.apache.hadoop.mapreduce.lib.db.DBConfiguration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getConnection" return="java.sql.Connection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getDBProductName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="createDBRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplits" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCountQuery" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the query for getting the total number of rows,
|
|
subclasses can override this for custom behaviour.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="inputClass" type="java.lang.Class"/>
|
|
<param name="tableName" type="java.lang.String"/>
|
|
<param name="conditions" type="java.lang.String"/>
|
|
<param name="orderBy" type="java.lang.String"/>
|
|
<param name="fieldNames" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[Initializes the map-part of the job with the appropriate input settings.
|
|
|
|
@param job The map-reduce job
|
|
@param inputClass the class object implementing DBWritable, which is the
|
|
Java object holding tuple fields.
|
|
@param tableName The table to read data from
|
|
@param conditions The condition which to select data with,
|
|
eg. '(updated > 20070101 AND length > 0)'
|
|
@param orderBy the fieldNames in the orderBy clause.
|
|
@param fieldNames The field names in the table
|
|
@see #setInput(Job, Class, String, String)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="inputClass" type="java.lang.Class"/>
|
|
<param name="inputQuery" type="java.lang.String"/>
|
|
<param name="inputCountQuery" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Initializes the map-part of the job with the appropriate input settings.
|
|
|
|
@param job The map-reduce job
|
|
@param inputClass the class object implementing DBWritable, which is the
|
|
Java object holding tuple fields.
|
|
@param inputQuery the input query to select fields. Example :
|
|
"SELECT f1, f2, f3 FROM Mytable ORDER BY f1"
|
|
@param inputCountQuery the input query that returns
|
|
the number of records in the table.
|
|
Example : "SELECT COUNT(f1) FROM Mytable"
|
|
@see #setInput(Job, Class, String, String, String, String...)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="closeConnection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A InputFormat that reads input data from an SQL table.
|
|
<p>
|
|
DBInputFormat emits LongWritables containing the record number as
|
|
key and DBWritables as value.
|
|
|
|
The SQL query, and input class can be using one of the two
|
|
setInput methods.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.DBInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit -->
|
|
<class name="DBInputFormat.DBInputSplit" extends="org.apache.hadoop.mapreduce.InputSplit"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="DBInputFormat.DBInputSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Default Constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="DBInputFormat.DBInputSplit" type="long, long"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Convenience Constructor
|
|
@param start the index of the first row to select
|
|
@param end the index of the last row to select]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getStart" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return The index of the first row to select]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getEnd" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return The index of the last row to select]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@return The total row count in this split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="input" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="output" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A InputSplit that spans a set of rows]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.DBInputFormat.NullDBWritable -->
|
|
<class name="DBInputFormat.NullDBWritable" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.db.DBWritable"/>
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="DBInputFormat.NullDBWritable"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="arg0" type="java.sql.ResultSet"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="arg0" type="java.sql.PreparedStatement"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A Class that does nothing, implementing DBWritable]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.DBInputFormat.NullDBWritable -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.DBOutputFormat -->
|
|
<class name="DBOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="DBOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="constructQuery" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="table" type="java.lang.String"/>
|
|
<param name="fieldNames" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[Constructs the query used as the prepared statement to insert data.
|
|
|
|
@param table
|
|
the table to insert into
|
|
@param fieldNames
|
|
the fields to insert into. If field names are unknown, supply an
|
|
array of nulls.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="tableName" type="java.lang.String"/>
|
|
<param name="fieldNames" type="java.lang.String[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Initializes the reduce-part of the job with
|
|
the appropriate output settings
|
|
|
|
@param job The job
|
|
@param tableName The table to insert data into
|
|
@param fieldNames The field names in the table.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="tableName" type="java.lang.String"/>
|
|
<param name="fieldCount" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Initializes the reduce-part of the job
|
|
with the appropriate output settings
|
|
|
|
@param job The job
|
|
@param tableName The table to insert data into
|
|
@param fieldCount the number of fields in the table.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A OutputFormat that sends the reduce output to a SQL table.
|
|
<p>
|
|
{@link DBOutputFormat} accepts <key,value> pairs, where
|
|
key has a type extending DBWritable. Returned {@link RecordWriter}
|
|
writes <b>only the key</b> to the database with a batch SQL query.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.DBOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.DBOutputFormat.DBRecordWriter -->
|
|
<class name="DBOutputFormat.DBRecordWriter" extends="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="DBOutputFormat.DBRecordWriter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</constructor>
|
|
<constructor name="DBOutputFormat.DBRecordWriter" type="java.sql.Connection, java.sql.PreparedStatement"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</constructor>
|
|
<method name="getConnection" return="java.sql.Connection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getStatement" return="java.sql.PreparedStatement"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A RecordWriter that writes the reduce output to a SQL table]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.DBOutputFormat.DBRecordWriter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.DBRecordReader -->
|
|
<class name="DBRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="DBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
<doc>
|
|
<![CDATA[@param split The InputSplit to read data for
|
|
@throws SQLException]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="executeQuery" return="java.sql.ResultSet"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="query" type="java.lang.String"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</method>
|
|
<method name="getSelectQuery" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the query for selecting the records,
|
|
subclasses can override this for custom behaviour.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="initialize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getCurrentKey" return="org.apache.hadoop.io.LongWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentValue" return="T"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValue" return="T"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="deprecated, no comment">
|
|
<doc>
|
|
<![CDATA[@deprecated]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="deprecated, no comment">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@deprecated]]>
|
|
</doc>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link #nextKeyValue()}">
|
|
<param name="key" type="org.apache.hadoop.io.LongWritable"/>
|
|
<param name="value" type="T"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@deprecated Use {@link #nextKeyValue()}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplit" return="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getFieldNames" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getTableName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getConditions" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getDBConf" return="org.apache.hadoop.mapreduce.lib.db.DBConfiguration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getConnection" return="java.sql.Connection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getStatement" return="java.sql.PreparedStatement"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setStatement"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="stmt" type="java.sql.PreparedStatement"/>
|
|
</method>
|
|
<field name="statement" type="java.sql.PreparedStatement"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A RecordReader that reads records from a SQL table.
|
|
Emits LongWritables containing the record number as
|
|
key and DBWritables as value.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.DBRecordReader -->
|
|
<!-- start interface org.apache.hadoop.mapreduce.lib.db.DBSplitter -->
|
|
<interface name="DBSplitter" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="split" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="results" type="java.sql.ResultSet"/>
|
|
<param name="colName" type="java.lang.String"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
<doc>
|
|
<![CDATA[Given a ResultSet containing one record (and already advanced to that record)
|
|
with two columns (a low value, and a high value, both of the same type), determine
|
|
a set of splits that span the given values.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[DBSplitter will generate DBInputSplits to use with DataDrivenDBInputFormat.
|
|
DataDrivenDBInputFormat needs to interpolate between two values that
|
|
represent the lowest and highest valued records to import. Depending
|
|
on the data-type of the column, this requires different behavior.
|
|
DBSplitter implementations should perform this for a data type or family
|
|
of data types.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapreduce.lib.db.DBSplitter -->
|
|
<!-- start interface org.apache.hadoop.mapreduce.lib.db.DBWritable -->
|
|
<interface name="DBWritable" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="statement" type="java.sql.PreparedStatement"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
<doc>
|
|
<![CDATA[Sets the fields of the object in the {@link PreparedStatement}.
|
|
@param statement the statement that the fields are put into.
|
|
@throws SQLException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="resultSet" type="java.sql.ResultSet"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
<doc>
|
|
<![CDATA[Reads the fields of the object from the {@link ResultSet}.
|
|
@param resultSet the {@link ResultSet} to get the fields from.
|
|
@throws SQLException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Objects that are read from/written to a database should implement
|
|
<code>DBWritable</code>. DBWritable, is similar to {@link Writable}
|
|
except that the {@link #write(PreparedStatement)} method takes a
|
|
{@link PreparedStatement}, and {@link #readFields(ResultSet)}
|
|
takes a {@link ResultSet}.
|
|
<p>
|
|
Implementations are responsible for writing the fields of the object
|
|
to PreparedStatement, and reading the fields of the object from the
|
|
ResultSet.
|
|
|
|
<p>Example:</p>
|
|
If we have the following table in the database :
|
|
<pre>
|
|
CREATE TABLE MyTable (
|
|
counter INTEGER NOT NULL,
|
|
timestamp BIGINT NOT NULL,
|
|
);
|
|
</pre>
|
|
then we can read/write the tuples from/to the table with :
|
|
<p><pre>
|
|
public class MyWritable implements Writable, DBWritable {
|
|
// Some data
|
|
private int counter;
|
|
private long timestamp;
|
|
|
|
//Writable#write() implementation
|
|
public void write(DataOutput out) throws IOException {
|
|
out.writeInt(counter);
|
|
out.writeLong(timestamp);
|
|
}
|
|
|
|
//Writable#readFields() implementation
|
|
public void readFields(DataInput in) throws IOException {
|
|
counter = in.readInt();
|
|
timestamp = in.readLong();
|
|
}
|
|
|
|
public void write(PreparedStatement statement) throws SQLException {
|
|
statement.setInt(1, counter);
|
|
statement.setLong(2, timestamp);
|
|
}
|
|
|
|
public void readFields(ResultSet resultSet) throws SQLException {
|
|
counter = resultSet.getInt(1);
|
|
timestamp = resultSet.getLong(2);
|
|
}
|
|
}
|
|
</pre></p>]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapreduce.lib.db.DBWritable -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.FloatSplitter -->
|
|
<class name="FloatSplitter" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/>
|
|
<constructor name="FloatSplitter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="split" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="results" type="java.sql.ResultSet"/>
|
|
<param name="colName" type="java.lang.String"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Implement DBSplitter over floating-point values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.FloatSplitter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.IntegerSplitter -->
|
|
<class name="IntegerSplitter" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/>
|
|
<constructor name="IntegerSplitter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="split" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="results" type="java.sql.ResultSet"/>
|
|
<param name="colName" type="java.lang.String"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Implement DBSplitter over integer values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.IntegerSplitter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.MySQLDataDrivenDBRecordReader -->
|
|
<class name="MySQLDataDrivenDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MySQLDataDrivenDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</constructor>
|
|
<method name="executeQuery" return="java.sql.ResultSet"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="query" type="java.lang.String"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A RecordReader that reads records from a MySQL table via DataDrivenDBRecordReader]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.MySQLDataDrivenDBRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.MySQLDBRecordReader -->
|
|
<class name="MySQLDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DBRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MySQLDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</constructor>
|
|
<method name="executeQuery" return="java.sql.ResultSet"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="query" type="java.lang.String"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A RecordReader that reads records from a MySQL table.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.MySQLDBRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBInputFormat -->
|
|
<class name="OracleDataDrivenDBInputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.conf.Configurable"/>
|
|
<constructor name="OracleDataDrivenDBInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getSplitter" return="org.apache.hadoop.mapreduce.lib.db.DBSplitter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="sqlDataType" type="int"/>
|
|
<doc>
|
|
<![CDATA[@return the DBSplitter implementation to use to divide the table/query into InputSplits.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createDBRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A InputFormat that reads input data from an SQL table in an Oracle db.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBRecordReader -->
|
|
<class name="OracleDataDrivenDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="OracleDataDrivenDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[A RecordReader that reads records from a Oracle table via DataDrivenDBRecordReader]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDateSplitter -->
|
|
<class name="OracleDateSplitter" extends="org.apache.hadoop.mapreduce.lib.db.DateSplitter"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="OracleDateSplitter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="dateToString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="d" type="java.util.Date"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Implement DBSplitter over date/time values returned by an Oracle db.
|
|
Make use of logic from DateSplitter, since this just needs to use
|
|
some Oracle-specific functions on the formatting end when generating
|
|
InputSplits.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDateSplitter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDBRecordReader -->
|
|
<class name="OracleDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DBRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="OracleDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</constructor>
|
|
<method name="getSelectQuery" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the query for selecting the records from an Oracle DB.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSessionTimeZone"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="conn" type="java.sql.Connection"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
<doc>
|
|
<![CDATA[Set session time zone
|
|
@param conf The current configuration.
|
|
We read the 'oracle.sessionTimeZone' property from here.
|
|
@param conn The connection to alter the timezone properties of.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="SESSION_TIMEZONE_KEY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Configuration key to set to a timezone string.]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A RecordReader that reads records from an Oracle SQL table.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDBRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.db.TextSplitter -->
|
|
<class name="TextSplitter" extends="org.apache.hadoop.mapreduce.lib.db.BigDecimalSplitter"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TextSplitter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="split" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="results" type="java.sql.ResultSet"/>
|
|
<param name="colName" type="java.lang.String"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
<doc>
|
|
<![CDATA[This method needs to determine the splits between two user-provided strings.
|
|
In the case where the user's strings are 'A' and 'Z', this is not hard; we
|
|
could create two splits from ['A', 'M') and ['M', 'Z'], 26 splits for strings
|
|
beginning with each letter, etc.
|
|
|
|
If a user has provided us with the strings "Ham" and "Haze", however, we need
|
|
to create splits that differ in the third letter.
|
|
|
|
The algorithm used is as follows:
|
|
Since there are 2**16 unicode characters, we interpret characters as digits in
|
|
base 65536. Given a string 's' containing characters s_0, s_1 .. s_n, we interpret
|
|
the string as the number: 0.s_0 s_1 s_2.. s_n in base 65536. Having mapped the
|
|
low and high strings into floating-point values, we then use the BigDecimalSplitter
|
|
to establish the even split points, then map the resulting floating point values
|
|
back into strings.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Implement DBSplitter over text strings.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.db.TextSplitter -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.fieldsel">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionHelper -->
|
|
<class name="FieldSelectionHelper" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="FieldSelectionHelper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="FieldSelectionHelper" type="org.apache.hadoop.io.Text, org.apache.hadoop.io.Text"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="parseOutputKeyValueSpec" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="keyValueSpec" type="java.lang.String"/>
|
|
<param name="keyFieldList" type="java.util.List"/>
|
|
<param name="valueFieldList" type="java.util.List"/>
|
|
</method>
|
|
<method name="specToString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="fieldSeparator" type="java.lang.String"/>
|
|
<param name="keyValueSpec" type="java.lang.String"/>
|
|
<param name="allValueFieldsFrom" type="int"/>
|
|
<param name="keyFieldList" type="java.util.List"/>
|
|
<param name="valueFieldList" type="java.util.List"/>
|
|
</method>
|
|
<method name="getKey" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getValue" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="extractOutputKeyValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.String"/>
|
|
<param name="val" type="java.lang.String"/>
|
|
<param name="fieldSep" type="java.lang.String"/>
|
|
<param name="keyFieldList" type="java.util.List"/>
|
|
<param name="valFieldList" type="java.util.List"/>
|
|
<param name="allValueFieldsFrom" type="int"/>
|
|
<param name="ignoreKey" type="boolean"/>
|
|
<param name="isMap" type="boolean"/>
|
|
</method>
|
|
<field name="emptyText" type="org.apache.hadoop.io.Text"
|
|
transient="false" volatile="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="DATA_FIELD_SEPERATOR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="MAP_OUTPUT_KEY_VALUE_SPEC" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="REDUCE_OUTPUT_KEY_VALUE_SPEC" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class implements a mapper/reducer class that can be used to perform
|
|
field selections in a manner similar to unix cut. The input data is treated
|
|
as fields separated by a user specified separator (the default value is
|
|
"\t"). The user can specify a list of fields that form the map output keys,
|
|
and a list of fields that form the map output values. If the inputformat is
|
|
TextInputFormat, the mapper will ignore the key to the map function. and the
|
|
fields are from the value only. Otherwise, the fields are the union of those
|
|
from the key and those from the value.
|
|
|
|
The field separator is under attribute "mapreduce.fieldsel.data.field.separator"
|
|
|
|
The map output field list spec is under attribute
|
|
"mapreduce.fieldsel.map.output.key.value.fields.spec".
|
|
The value is expected to be like "keyFieldsSpec:valueFieldsSpec"
|
|
key/valueFieldsSpec are comma (,) separated field spec: fieldSpec,fieldSpec,fieldSpec ...
|
|
Each field spec can be a simple number (e.g. 5) specifying a specific field, or a range
|
|
(like 2-5) to specify a range of fields, or an open range (like 3-) specifying all
|
|
the fields starting from field 3. The open range field spec applies value fields only.
|
|
They have no effect on the key fields.
|
|
|
|
Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields 4,3,0 and 1 for keys,
|
|
and use fields 6,5,1,2,3,7 and above for values.
|
|
|
|
The reduce output field list spec is under attribute
|
|
"mapreduce.fieldsel.reduce.output.key.value.fields.spec".
|
|
|
|
The reducer extracts output key/value pairs in a similar manner, except that
|
|
the key is never ignored.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionHelper -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionMapper -->
|
|
<class name="FieldSelectionMapper" extends="org.apache.hadoop.mapreduce.Mapper"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="FieldSelectionMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="val" type="V"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[The identify function. Input key/value pair is written directly to output.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class implements a mapper class that can be used to perform
|
|
field selections in a manner similar to unix cut. The input data is treated
|
|
as fields separated by a user specified separator (the default value is
|
|
"\t"). The user can specify a list of fields that form the map output keys,
|
|
and a list of fields that form the map output values. If the inputformat is
|
|
TextInputFormat, the mapper will ignore the key to the map function. and the
|
|
fields are from the value only. Otherwise, the fields are the union of those
|
|
from the key and those from the value.
|
|
|
|
The field separator is under attribute "mapreduce.fieldsel.data.field.separator"
|
|
|
|
The map output field list spec is under attribute
|
|
"mapreduce.fieldsel.map.output.key.value.fields.spec".
|
|
The value is expected to be like
|
|
"keyFieldsSpec:valueFieldsSpec" key/valueFieldsSpec are comma (,) separated
|
|
field spec: fieldSpec,fieldSpec,fieldSpec ... Each field spec can be a
|
|
simple number (e.g. 5) specifying a specific field, or a range (like 2-5)
|
|
to specify a range of fields, or an open range (like 3-) specifying all
|
|
the fields starting from field 3. The open range field spec applies value
|
|
fields only. They have no effect on the key fields.
|
|
|
|
Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields
|
|
4,3,0 and 1 for keys, and use fields 6,5,1,2,3,7 and above for values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionMapper -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionReducer -->
|
|
<class name="FieldSelectionReducer" extends="org.apache.hadoop.mapreduce.Reducer"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="FieldSelectionReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.Text"/>
|
|
<param name="values" type="java.lang.Iterable"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class implements a reducer class that can be used to perform field
|
|
selections in a manner similar to unix cut.
|
|
|
|
The input data is treated as fields separated by a user specified
|
|
separator (the default value is "\t"). The user can specify a list of
|
|
fields that form the reduce output keys, and a list of fields that form
|
|
the reduce output values. The fields are the union of those from the key
|
|
and those from the value.
|
|
|
|
The field separator is under attribute "mapreduce.fieldsel.data.field.separator"
|
|
|
|
The reduce output field list spec is under attribute
|
|
"mapreduce.fieldsel.reduce.output.key.value.fields.spec".
|
|
The value is expected to be like
|
|
"keyFieldsSpec:valueFieldsSpec" key/valueFieldsSpec are comma (,)
|
|
separated field spec: fieldSpec,fieldSpec,fieldSpec ... Each field spec
|
|
can be a simple number (e.g. 5) specifying a specific field, or a range
|
|
(like 2-5) to specify a range of fields, or an open range (like 3-)
|
|
specifying all the fields starting from field 3. The open range field
|
|
spec applies value fields only. They have no effect on the key fields.
|
|
|
|
Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields
|
|
4,3,0 and 1 for keys, and use fields 6,5,1,2,3,7 and above for values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionReducer -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.input">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat -->
|
|
<class name="CombineFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="CombineFileInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="setMaxSplitSize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="maxSplitSize" type="long"/>
|
|
<doc>
|
|
<![CDATA[Specify the maximum size (in bytes) of each split. Each split is
|
|
approximately equal to the specified size.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMinSplitSizeNode"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="minSplitSizeNode" type="long"/>
|
|
<doc>
|
|
<![CDATA[Specify the minimum size (in bytes) of each split per node.
|
|
This applies to data that is left over after combining data on a single
|
|
node into splits that are of maximum size specified by maxSplitSize.
|
|
This leftover data will be combined into its own split if its size
|
|
exceeds minSplitSizeNode.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMinSplitSizeRack"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="minSplitSizeRack" type="long"/>
|
|
<doc>
|
|
<![CDATA[Specify the minimum size (in bytes) of each split per rack.
|
|
This applies to data that is left over after combining data on a single
|
|
rack into splits that are of maximum size specified by maxSplitSize.
|
|
This leftover data will be combined into its own split if its size
|
|
exceeds minSplitSizeRack.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createPool"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="filters" type="java.util.List"/>
|
|
<doc>
|
|
<![CDATA[Create a new pool and add the filters to it.
|
|
A split cannot have files from different pools.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createPool"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="filters" type="org.apache.hadoop.fs.PathFilter[]"/>
|
|
<doc>
|
|
<![CDATA[Create a new pool and add the filters to it.
|
|
A pathname can satisfy any one of the specified filters.
|
|
A split cannot have files from different pools.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplits" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This is not implemented yet.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="SPLIT_MINSIZE_PERNODE" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="SPLIT_MINSIZE_PERRACK" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[An abstract {@link InputFormat} that returns {@link CombineFileSplit}'s in
|
|
{@link InputFormat#getSplits(JobContext)} method.
|
|
|
|
Splits are constructed from the files under the input paths.
|
|
A split cannot have files from different pools.
|
|
Each split returned may contain blocks from different files.
|
|
If a maxSplitSize is specified, then blocks on the same node are
|
|
combined to form a single split. Blocks that are left over are
|
|
then combined with other blocks in the same rack.
|
|
If maxSplitSize is not specified, then blocks from the same rack
|
|
are combined in a single split; no attempt is made to create
|
|
node-local splits.
|
|
If the maxSplitSize is equal to the block size, then this class
|
|
is similar to the default splitting behavior in Hadoop: each
|
|
block is a locally processed split.
|
|
Subclasses implement
|
|
{@link InputFormat#createRecordReader(InputSplit, TaskAttemptContext)}
|
|
to construct <code>RecordReader</code>'s for
|
|
<code>CombineFileSplit</code>'s.
|
|
|
|
@see CombineFileSplit]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader -->
|
|
<class name="CombineFileRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="CombineFileRecordReader" type="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit, org.apache.hadoop.mapreduce.TaskAttemptContext, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[A generic RecordReader that can hand out different recordReaders
|
|
for each chunk in the CombineFileSplit.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="initialize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getCurrentKey" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getCurrentValue" return="V"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[return progress based on the amount of data processed so far.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="initNextRecordReader" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the record reader for the next chunk in this CombineFileSplit.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="split" type="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="rrClass" type="java.lang.Class"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="rrConstructor" type="java.lang.reflect.Constructor"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="fs" type="org.apache.hadoop.fs.FileSystem"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="idx" type="int"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="progress" type="long"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="curReader" type="org.apache.hadoop.mapreduce.RecordReader"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A generic RecordReader that can hand out different recordReaders
|
|
for each chunk in a {@link CombineFileSplit}.
|
|
A CombineFileSplit can combine data chunks from multiple files.
|
|
This class allows using different RecordReaders for processing
|
|
these data chunks from different files.
|
|
@see CombineFileSplit]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.CombineFileSplit -->
|
|
<class name="CombineFileSplit" extends="org.apache.hadoop.mapreduce.InputSplit"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="CombineFileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="CombineFileSplit" type="org.apache.hadoop.fs.Path[], long[], long[], java.lang.String[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="CombineFileSplit" type="org.apache.hadoop.fs.Path[], long[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="CombineFileSplit" type="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Copy constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getStartOffsets" return="long[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns an array containing the start offsets of the files in the split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLengths" return="long[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns an array containing the lengths of the files in the split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOffset" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<doc>
|
|
<![CDATA[Returns the start offset of the i<sup>th</sup> Path]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<doc>
|
|
<![CDATA[Returns the length of the i<sup>th</sup> Path]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumPaths" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the number of Paths in the split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<doc>
|
|
<![CDATA[Returns the i<sup>th</sup> Path]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPaths" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns all the Paths in the split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Returns all the Paths where this input-split resides]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A sub-collection of input files.
|
|
|
|
Unlike {@link FileSplit}, CombineFileSplit class does not represent
|
|
a split of a file, but a split of input files into smaller sets.
|
|
A split may contain blocks from different file but all
|
|
the blocks in the same split are probably local to some rack <br>
|
|
CombineFileSplit can be used to implement {@link RecordReader}'s,
|
|
with reading one record per file.
|
|
|
|
@see FileSplit
|
|
@see CombineFileInputFormat]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.CombineFileSplit -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.FileInputFormat -->
|
|
<class name="FileInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="FileInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getFormatMinSplitSize" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the lower bound on split size imposed by the format.
|
|
@return the number of bytes of the minimal split for this format]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isSplitable" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<param name="filename" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Is the given filename splitable? Usually, true, but if the file is
|
|
stream compressed, it will not be.
|
|
|
|
<code>FileInputFormat</code> implementations can override this and return
|
|
<code>false</code> to ensure that individual input files are never split-up
|
|
so that {@link Mapper}s process entire files.
|
|
|
|
@param context the job context
|
|
@param filename the file name to check
|
|
@return is this file splitable?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInputPathFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="filter" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set a PathFilter to be applied to the input paths for the map-reduce job.
|
|
@param job the job to modify
|
|
@param filter the PathFilter class use for filtering the input paths.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMinInputSplitSize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="size" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the minimum input split size
|
|
@param job the job to modify
|
|
@param size the minimum size]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMinSplitSize" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the minimum split size
|
|
@param job the job
|
|
@return the minimum number of bytes that can be in a split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxInputSplitSize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="size" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the maximum split size
|
|
@param job the job to modify
|
|
@param size the maximum split size]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxSplitSize" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the maximum split size.
|
|
@param context the job to look at.
|
|
@return the maximum number of bytes a split can include]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputPathFilter" return="org.apache.hadoop.fs.PathFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get a PathFilter instance of the filter set for the input paths.
|
|
|
|
@return the PathFilter instance set for the job, NULL if none has been set.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="listStatus" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[List input directories.
|
|
Subclasses may override to, e.g., select only files matching a regular
|
|
expression.
|
|
|
|
@param job the job to list input paths for
|
|
@return array of FileStatus objects
|
|
@throws IOException if zero items.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="makeSplit" return="org.apache.hadoop.mapreduce.lib.input.FileSplit"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="file" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="start" type="long"/>
|
|
<param name="length" type="long"/>
|
|
<param name="hosts" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[A factory that makes the split for this class. It can be overridden
|
|
by sub-classes to make sub-types]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplits" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Generate the list of files and make them into FileSplits.
|
|
@param job the job context
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="computeSplitSize" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="blockSize" type="long"/>
|
|
<param name="minSize" type="long"/>
|
|
<param name="maxSize" type="long"/>
|
|
</method>
|
|
<method name="getBlockIndex" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/>
|
|
<param name="offset" type="long"/>
|
|
</method>
|
|
<method name="setInputPaths"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="commaSeparatedPaths" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Sets the given comma separated paths as the list of inputs
|
|
for the map-reduce job.
|
|
|
|
@param job the job
|
|
@param commaSeparatedPaths Comma separated paths to be set as
|
|
the list of inputs for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addInputPaths"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="commaSeparatedPaths" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add the given comma separated paths to the list of inputs for
|
|
the map-reduce job.
|
|
|
|
@param job The job to modify
|
|
@param commaSeparatedPaths Comma separated paths to be added to
|
|
the list of inputs for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInputPaths"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="inputPaths" type="org.apache.hadoop.fs.Path[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Set the array of {@link Path}s as the list of inputs
|
|
for the map-reduce job.
|
|
|
|
@param job The job to modify
|
|
@param inputPaths the {@link Path}s of the input directories/files
|
|
for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addInputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add a {@link Path} to the list of inputs for the map-reduce job.
|
|
|
|
@param job The {@link Job} to modify
|
|
@param path {@link Path} to be added to the list of inputs for
|
|
the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputPaths" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the list of input {@link Path}s for the map-reduce job.
|
|
|
|
@param context The job
|
|
@return the list of input {@link Path}s for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="COUNTER_GROUP" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="BYTES_READ" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="INPUT_DIR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="SPLIT_MAXSIZE" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="SPLIT_MINSIZE" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="PATHFILTER_CLASS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="NUM_INPUT_FILES" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A base class for file-based {@link InputFormat}s.
|
|
|
|
<p><code>FileInputFormat</code> is the base class for all file-based
|
|
<code>InputFormat</code>s. This provides a generic implementation of
|
|
{@link #getSplits(JobContext)}.
|
|
Subclasses of <code>FileInputFormat</code> can also override the
|
|
{@link #isSplitable(JobContext, Path)} method to ensure input-files are
|
|
not split-up and are processed as a whole by {@link Mapper}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.FileInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.FileSplit -->
|
|
<class name="FileSplit" extends="org.apache.hadoop.mapreduce.InputSplit"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="FileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a split with host information
|
|
|
|
@param file the file name
|
|
@param start the position of the first byte in the file to process
|
|
@param length the number of bytes in the file to process
|
|
@param hosts the list of hosts containing the block, possibly null]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The file containing this split's data.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getStart" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The position of the first byte in the file to process.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The number of bytes in the file to process.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A section of an input file. Returned by {@link
|
|
InputFormat#getSplits(JobContext)} and passed to
|
|
{@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.FileSplit -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.InvalidInputException -->
|
|
<class name="InvalidInputException" extends="java.io.IOException"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="InvalidInputException" type="java.util.List"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create the exception with the given list.
|
|
@param probs the list of problems to report. this list is not copied.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getProblems" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the complete list of the problems reported.
|
|
@return the list of problems, which must not be modified]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMessage" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get a summary message of the problems found.
|
|
@return the concatenated messages from all of the problems.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class wraps a list of problems with the input, so that the user
|
|
can get a list of problems together instead of finding and fixing them one
|
|
by one.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.InvalidInputException -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader -->
|
|
<class name="KeyValueLineRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="KeyValueLineRecordReader" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="initialize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericSplit" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="findSeparator" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="utf" type="byte[]"/>
|
|
<param name="start" type="int"/>
|
|
<param name="length" type="int"/>
|
|
<param name="sep" type="byte"/>
|
|
</method>
|
|
<method name="setKeyValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.Text"/>
|
|
<param name="value" type="org.apache.hadoop.io.Text"/>
|
|
<param name="line" type="byte[]"/>
|
|
<param name="lineLen" type="int"/>
|
|
<param name="pos" type="int"/>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read key/value pair in a line.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentKey" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getCurrentValue" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="KEY_VALUE_SEPERATOR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class treats a line in the input as a key/value pair separated by a
|
|
separator character. The separator can be specified in config file
|
|
under the attribute name mapreduce.input.keyvaluelinerecordreader.key.value.separator. The default
|
|
separator is the tab character ('\t').]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat -->
|
|
<class name="KeyValueTextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="KeyValueTextInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="isSplitable" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<param name="file" type="org.apache.hadoop.fs.Path"/>
|
|
</method>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericSplit" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines.
|
|
Either line feed or carriage-return are used to signal end of line.
|
|
Each line is divided into key and value parts by a separator byte. If no
|
|
such a byte exists, the key will be the entire line and value will be empty.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.MultipleInputs -->
|
|
<class name="MultipleInputs" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultipleInputs"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="addInputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="inputFormatClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Add a {@link Path} with a custom {@link InputFormat} to the list of
|
|
inputs for the map-reduce job.
|
|
|
|
@param job The {@link Job}
|
|
@param path {@link Path} to be added to the list of inputs for the job
|
|
@param inputFormatClass {@link InputFormat} class to use for this path]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addInputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="inputFormatClass" type="java.lang.Class"/>
|
|
<param name="mapperClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Add a {@link Path} with a custom {@link InputFormat} and
|
|
{@link Mapper} to the list of inputs for the map-reduce job.
|
|
|
|
@param job The {@link Job}
|
|
@param path {@link Path} to be added to the list of inputs for the job
|
|
@param inputFormatClass {@link InputFormat} class to use for this path
|
|
@param mapperClass {@link Mapper} class to use for this path]]>
|
|
</doc>
|
|
</method>
|
|
<field name="DIR_FORMATS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="DIR_MAPPERS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class supports MapReduce jobs that have multiple input paths with
|
|
a different {@link InputFormat} and {@link Mapper} for each path]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.MultipleInputs -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.NLineInputFormat -->
|
|
<class name="NLineInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="NLineInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericSplit" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getSplits" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Logically splits the set of input files for the job, splits N lines
|
|
of the input as one split.
|
|
|
|
@see FileInputFormat#getSplits(JobContext)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplitsForFile" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="status" type="org.apache.hadoop.fs.FileStatus"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="numLinesPerSplit" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="setNumLinesPerSplit"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="numLines" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the number of lines per split
|
|
@param job the job to modify
|
|
@param numLines the number of lines per split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumLinesPerSplit" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the number of lines per split
|
|
@param job the job
|
|
@return the number of lines per split]]>
|
|
</doc>
|
|
</method>
|
|
<field name="LINES_PER_MAP" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[NLineInputFormat which splits N lines of input as one split.
|
|
|
|
In many "pleasantly" parallel applications, each process/mapper
|
|
processes the same input file (s), but with computations are
|
|
controlled by different parameters.(Referred to as "parameter sweeps").
|
|
One way to achieve this, is to specify a set of parameters
|
|
(one set per line) as input in a control file
|
|
(which is the input path to the map-reduce application,
|
|
where as the input dataset is specified
|
|
via a config variable in JobConf.).
|
|
|
|
The NLineInputFormat can be used in such applications, that splits
|
|
the input file such that by default, one line is fed as
|
|
a value to one map task, and key is the offset.
|
|
i.e. (k,v) is (LongWritable, Text).
|
|
The location hints will span the whole mapred cluster.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.NLineInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsBinaryInputFormat -->
|
|
<class name="SequenceFileAsBinaryInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileAsBinaryInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[InputFormat reading keys, values from SequenceFiles in binary (raw)
|
|
format.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsBinaryInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader -->
|
|
<class name="SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="initialize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getCurrentKey" return="org.apache.hadoop.io.BytesWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getCurrentValue" return="org.apache.hadoop.io.BytesWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getKeyClassName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Retrieve the name of the key class for this SequenceFile.
|
|
@see org.apache.hadoop.io.SequenceFile.Reader#getKeyClassName]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getValueClassName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Retrieve the name of the value class for this SequenceFile.
|
|
@see org.apache.hadoop.io.SequenceFile.Reader#getValueClassName]]>
|
|
</doc>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Read raw bytes from a SequenceFile.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Return the progress within the input split
|
|
@return 0.0 to 1.0 of the input byte range]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Read records from a SequenceFile as binary (raw) bytes.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat -->
|
|
<class name="SequenceFileAsTextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileAsTextInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class is similar to SequenceFileInputFormat, except it generates
|
|
SequenceFileAsTextRecordReader which converts the input keys and values
|
|
to their String forms by calling toString() method.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextRecordReader -->
|
|
<class name="SequenceFileAsTextRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileAsTextRecordReader"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="initialize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getCurrentKey" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getCurrentValue" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Read key/value pair in a line.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class converts the input keys and values to their String forms by
|
|
calling toString() method. This class to SequenceFileAsTextInputFormat
|
|
class is as LineRecordReader class to TextInputFormat class.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter -->
|
|
<class name="SequenceFileInputFilter" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileInputFilter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Create a record reader for the given split
|
|
@param split file split
|
|
@param context the task-attempt context
|
|
@return RecordReader]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setFilterClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="filterClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[set the filter class
|
|
|
|
@param job The job
|
|
@param filterClass filter class]]>
|
|
</doc>
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="FILTER_CLASS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="FILTER_FREQUENCY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="FILTER_REGEX" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A class that allows a map/red job to work on a sample of sequence files.
|
|
The sample is decided by the filter class set by the job.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter -->
|
|
<!-- start interface org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.Filter -->
|
|
<interface name="SequenceFileInputFilter.Filter" abstract="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.conf.Configurable"/>
|
|
<method name="accept" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[filter function
|
|
Decide if a record should be filtered or not
|
|
@param key record key
|
|
@return true if a record is accepted; return false otherwise]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[filter interface]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.Filter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.FilterBase -->
|
|
<class name="SequenceFileInputFilter.FilterBase" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.Filter"/>
|
|
<constructor name="SequenceFileInputFilter.FilterBase"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[base class for Filters]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.FilterBase -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.MD5Filter -->
|
|
<class name="SequenceFileInputFilter.MD5Filter" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.FilterBase"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileInputFilter.MD5Filter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setFrequency"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="frequency" type="int"/>
|
|
<doc>
|
|
<![CDATA[set the filtering frequency in configuration
|
|
|
|
@param conf configuration
|
|
@param frequency filtering frequency]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[configure the filter according to configuration
|
|
|
|
@param conf configuration]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Filtering method
|
|
If MD5(key) % frequency==0, return true; otherwise return false
|
|
@see Filter#accept(Object)]]>
|
|
</doc>
|
|
</method>
|
|
<field name="MD5_LEN" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class returns a set of records by examing the MD5 digest of its
|
|
key against a filtering frequency <i>f</i>. The filtering criteria is
|
|
MD5(key) % f == 0.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.MD5Filter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.PercentFilter -->
|
|
<class name="SequenceFileInputFilter.PercentFilter" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.FilterBase"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileInputFilter.PercentFilter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setFrequency"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="frequency" type="int"/>
|
|
<doc>
|
|
<![CDATA[set the frequency and stores it in conf
|
|
@param conf configuration
|
|
@param frequency filtering frequencey]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[configure the filter by checking the configuration
|
|
|
|
@param conf configuration]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Filtering method
|
|
If record# % frequency==0, return true; otherwise return false
|
|
@see Filter#accept(Object)]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class returns a percentage of records
|
|
The percentage is determined by a filtering frequency <i>f</i> using
|
|
the criteria record# % f == 0.
|
|
For example, if the frequency is 10, one out of 10 records is returned.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.PercentFilter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.RegexFilter -->
|
|
<class name="SequenceFileInputFilter.RegexFilter" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.FilterBase"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileInputFilter.RegexFilter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setPattern"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="regex" type="java.lang.String"/>
|
|
<exception name="PatternSyntaxException" type="java.util.regex.PatternSyntaxException"/>
|
|
<doc>
|
|
<![CDATA[Define the filtering regex and stores it in conf
|
|
@param conf where the regex is set
|
|
@param regex regex used as a filter]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[configure the Filter by checking the configuration]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Filtering method
|
|
If key matches the regex, return true; otherwise return false
|
|
@see Filter#accept(Object)]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Records filter by matching key to regex]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.RegexFilter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat -->
|
|
<class name="SequenceFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getFormatMinSplitSize" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="listStatus" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link InputFormat} for {@link SequenceFile}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader -->
|
|
<class name="SequenceFileRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileRecordReader"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="initialize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getCurrentKey" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getCurrentValue" return="V"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Return the progress within the input split
|
|
@return 0.0 to 1.0 of the input byte range]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="conf" type="org.apache.hadoop.conf.Configuration"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[An {@link RecordReader} for {@link SequenceFile}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.TextInputFormat -->
|
|
<class name="TextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TextInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
</method>
|
|
<method name="isSplitable" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<param name="file" type="org.apache.hadoop.fs.Path"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines.
|
|
Either linefeed or carriage-return are used to signal end of line. Keys are
|
|
the position in the file, and values are the line of text..]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.TextInputFormat -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.jobcontrol">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob -->
|
|
<class name="ControlledJob" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ControlledJob" type="org.apache.hadoop.mapreduce.Job, java.util.List"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Construct a job.
|
|
@param job a mapreduce job to be executed.
|
|
@param dependingJobs an array of jobs the current job depends on]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="ControlledJob" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Construct a job.
|
|
|
|
@param conf mapred job configuration representing a job to be executed.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the job name of this job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the job name for this job.
|
|
@param jobName the job name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobID" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the job ID of this job assigned by JobControl]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="id" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the job ID for this job.
|
|
@param id the job ID]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapredJobID" return="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the mapred ID of this job as assigned by the
|
|
mapred framework.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJob" return="org.apache.hadoop.mapreduce.Job"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the mapreduce job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJob"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<doc>
|
|
<![CDATA[Set the mapreduce job
|
|
@param job the mapreduce job for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobState" return="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the state of this job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobState"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="state" type="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State"/>
|
|
<doc>
|
|
<![CDATA[Set the state for this job.
|
|
@param state the new state for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMessage" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the message of this job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMessage"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="message" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the message for this job.
|
|
@param message the message for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDependentJobs" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the depending jobs of this job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addDependingJob" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="dependingJob" type="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob"/>
|
|
<doc>
|
|
<![CDATA[Add a job to this jobs' dependency list.
|
|
Dependent jobs can only be added while a Job
|
|
is waiting to run, not during or afterwards.
|
|
|
|
@param dependingJob Job that this Job depends on.
|
|
@return <tt>true</tt> if the Job was added.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isCompleted" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return true if this job is in a complete state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isReady" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return true if this job is in READY state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="killJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="submit"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Submit this job to mapred. The state becomes RUNNING if submission
|
|
is successful, FAILED otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="CREATE_DIR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class encapsulates a MapReduce job and its dependency. It monitors
|
|
the states of the depending jobs and updates the state of this job.
|
|
A job starts in the WAITING state. If it does not have any depending jobs,
|
|
or all of the depending jobs are in SUCCESS state, then the job state
|
|
will become READY. If any depending jobs fail, the job will fail too.
|
|
When in READY state, the job can be submitted to Hadoop for execution, with
|
|
the state changing into RUNNING state. From RUNNING state, the job
|
|
can get into SUCCESS or FAILED state, depending
|
|
the status of the job execution.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State -->
|
|
<class name="ControlledJob.State" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl -->
|
|
<class name="JobControl" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="java.lang.Runnable"/>
|
|
<constructor name="JobControl" type="java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a job control for a group of jobs.
|
|
@param groupName a name identifying this group]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getWaitingJobList" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the jobs in the waiting state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRunningJobList" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the jobs in the running state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReadyJobsList" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the jobs in the ready state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSuccessfulJobList" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the jobs in the success state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getFailedJobList" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="addJob" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="aJob" type="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob"/>
|
|
<doc>
|
|
<![CDATA[Add a new job.
|
|
@param aJob the new job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addJobCollection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobs" type="java.util.Collection"/>
|
|
<doc>
|
|
<![CDATA[Add a collection of jobs
|
|
|
|
@param jobs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getThreadState" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.ThreadState"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the thread state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="stop"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[set the thread state to STOPPING so that the
|
|
thread will stop when it wakes up.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="suspend"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[suspend the running thread]]>
|
|
</doc>
|
|
</method>
|
|
<method name="resume"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[resume the suspended thread]]>
|
|
</doc>
|
|
</method>
|
|
<method name="allFinished" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The main loop for the thread.
|
|
The loop does the following:
|
|
Check the states of the running jobs
|
|
Update the states of waiting jobs
|
|
Submit the jobs in ready state]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class encapsulates a set of MapReduce jobs and its dependency.
|
|
|
|
It tracks the states of the jobs by placing them into different tables
|
|
according to their states.
|
|
|
|
This class provides APIs for the client app to add a job to the group
|
|
and to get the jobs in the group in different states. When a job is
|
|
added, an ID unique to the group is assigned to the job.
|
|
|
|
This class has a thread that submits jobs when they become ready,
|
|
monitors the states of the running jobs, and updates the states of jobs
|
|
based on the state changes of their depending jobs states. The class
|
|
provides APIs for suspending/resuming the thread, and
|
|
for stopping the thread.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.ThreadState -->
|
|
<class name="JobControl.ThreadState" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.ThreadState[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.ThreadState"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.ThreadState -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.join">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.ArrayListBackedIterator -->
|
|
<class name="ArrayListBackedIterator" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/>
|
|
<constructor name="ArrayListBackedIterator"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="ArrayListBackedIterator" type="java.util.ArrayList"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="X"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="replay" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="X"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="item" type="X"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="clear"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class provides an implementation of ResetableIterator. The
|
|
implementation uses an {@link java.util.ArrayList} to store elements
|
|
added to it, replaying them as requested.
|
|
Prefer {@link StreamBackedIterator}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.ArrayListBackedIterator -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.ComposableInputFormat -->
|
|
<class name="ComposableInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ComposableInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Refinement of InputFormat requiring implementors to provide
|
|
ComposableRecordReader instead of RecordReader.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.ComposableInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader -->
|
|
<class name="ComposableRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="java.lang.Comparable"/>
|
|
<constructor name="ComposableRecordReader"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[Additional operations required of a RecordReader to participate in a join.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.CompositeInputFormat -->
|
|
<class name="CompositeInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="CompositeInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setFormat"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Interpret a given string as a composite expression.
|
|
{@code
|
|
func ::= <ident>([<func>,]*<func>)
|
|
func ::= tbl(<class>,"<path>")
|
|
class ::= @see java.lang.Class#forName(java.lang.String)
|
|
path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String)
|
|
}
|
|
Reads expression from the <tt>mapreduce.join.expr</tt> property and
|
|
user-supplied join types from <tt>mapreduce.join.define.<ident></tt>
|
|
types. Paths supplied to <tt>tbl</tt> are given as input paths to the
|
|
InputFormat class listed.
|
|
@see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addDefaults"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Adds the default set of identifiers to the parser.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplits" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Build a CompositeInputSplit from the child InputFormats by assigning the
|
|
ith split from each child to the ith composite split.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Construct a CompositeRecordReader for the children of this InputFormat
|
|
as defined in the init expression.
|
|
The outermost join need only be composable, not necessarily a composite.
|
|
Mandating TupleWritable isn't strictly correct.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="compose" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="inf" type="java.lang.Class"/>
|
|
<param name="path" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Convenience method for constructing composite formats.
|
|
Given InputFormat class (inf), path (p) return:
|
|
{@code tbl(<inf>, <p>) }]]>
|
|
</doc>
|
|
</method>
|
|
<method name="compose" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="op" type="java.lang.String"/>
|
|
<param name="inf" type="java.lang.Class"/>
|
|
<param name="path" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[Convenience method for constructing composite formats.
|
|
Given operation (op), Object class (inf), set of paths (p) return:
|
|
{@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]>
|
|
</doc>
|
|
</method>
|
|
<method name="compose" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="op" type="java.lang.String"/>
|
|
<param name="inf" type="java.lang.Class"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path[]"/>
|
|
<doc>
|
|
<![CDATA[Convenience method for constructing composite formats.
|
|
Given operation (op), Object class (inf), set of paths (p) return:
|
|
{@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]>
|
|
</doc>
|
|
</method>
|
|
<field name="JOIN_EXPR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="JOIN_COMPARATOR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[An InputFormat capable of performing joins over a set of data sources sorted
|
|
and partitioned the same way.
|
|
@see #setFormat
|
|
|
|
A user may define new join types by setting the property
|
|
<tt>mapreduce.join.define.<ident></tt> to a classname.
|
|
In the expression <tt>mapreduce.join.expr</tt>, the identifier will be
|
|
assumed to be a ComposableRecordReader.
|
|
<tt>mapreduce.join.keycomparator</tt> can be a classname used to compare
|
|
keys in the join.
|
|
@see JoinRecordReader
|
|
@see MultiFilterRecordReader]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.CompositeInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.CompositeInputSplit -->
|
|
<class name="CompositeInputSplit" extends="org.apache.hadoop.mapreduce.InputSplit"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="CompositeInputSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="CompositeInputSplit" type="int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="s" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Add an InputSplit to this collection.
|
|
@throws IOException If capacity was not specified during construction
|
|
or if capacity has been reached.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="get" return="org.apache.hadoop.mapreduce.InputSplit"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<doc>
|
|
<![CDATA[Get ith child InputSplit.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Return the aggregate length of all child InputSplits currently added.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the length of ith child InputSplit.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Collect a set of hosts from all child InputSplits.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocation" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[getLocations from ith InputSplit.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Write splits in the following format.
|
|
{@code
|
|
<count><class1><class2>...<classn><split1><split2>...<splitn>
|
|
}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}
|
|
@throws IOException If the child InputSplit cannot be read, typically
|
|
for failing access checks.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This InputSplit contains a set of child InputSplits. Any InputSplit inserted
|
|
into this collection must have a public default constructor.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.CompositeInputSplit -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader -->
|
|
<class name="CompositeRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.conf.Configurable"/>
|
|
<constructor name="CompositeRecordReader" type="int, int, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Create a RecordReader with <tt>capacity</tt> children to position
|
|
<tt>id</tt> in the parent reader.
|
|
The id of a root CompositeRecordReader is -1 by convention, but relying
|
|
on this is not recommended.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="combine" return="boolean"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="srcs" type="java.lang.Object[]"/>
|
|
<param name="value" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
|
|
</method>
|
|
<method name="initialize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="id" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the position in the collector this class occupies.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordReaderQueue" return="java.util.PriorityQueue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return sorted list of RecordReaders for this composite.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getComparator" return="org.apache.hadoop.io.WritableComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return comparator defining the ordering for RecordReaders in this
|
|
composite.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="rr" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Add a RecordReader to this collection.
|
|
The id() of a RecordReader determines where in the Tuple its
|
|
entry will appear. Adding RecordReaders with the same id has
|
|
undefined behavior.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the key for the current join or the value at the top of the
|
|
RecordReader heap.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Clone the key at the top of this RR into the given object.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentKey" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return true if it is possible that this could emit more values.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="skip"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Pass skip key to child RRs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDelegate" return="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Obtain an iterator over the child RRs apropos of the value type
|
|
ultimately emitted from this join.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jc" type="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector"/>
|
|
<param name="key" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[If key provided matches that of this Composite, give JoinCollector
|
|
iterator over values it may emit.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="fillJoinCollector"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="iterkey" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[For all child RRs offering the key provided, obtain an iterator
|
|
at that position in the JoinCollector.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="compareTo" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"/>
|
|
<doc>
|
|
<![CDATA[Implement Comparable contract (compare key of join or head of heap
|
|
with that of another).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createKey" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a new key common to all child RRs.
|
|
@throws ClassCastException if key classes differ.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createTupleWritable" return="org.apache.hadoop.mapreduce.lib.join.TupleWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a value to be used internally for joins.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentValue" return="X"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close all child RRs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Report progress as the minimum of all child RR progress.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="conf" type="org.apache.hadoop.conf.Configuration"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="keyclass" type="java.lang.Class"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="jc" type="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector"
|
|
transient="false" volatile="false"
|
|
static="false" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="kids" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader[]"
|
|
transient="false" volatile="false"
|
|
static="false" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="key" type="K"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="value" type="X"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A RecordReader that can effect joins of RecordReaders sharing a common key
|
|
type and partitioning.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector -->
|
|
<class name="CompositeRecordReader.JoinCollector" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="CompositeRecordReader.JoinCollector" type="int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a collector capable of handling the specified number of
|
|
children.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="id" type="int"/>
|
|
<param name="i" type="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Register a given iterator at position id.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the key associated with this collection.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<doc>
|
|
<![CDATA[Codify the contents of the collector to be iterated over.
|
|
When this is called, all RecordReaders registered for this
|
|
key should have added ResetableIterators.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="clear"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Clear all state information.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns false if exhausted or if reset(K) has not been called.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Populate Tuple from iterators.
|
|
It should be the case that, given iterators i_1...i_n over values from
|
|
sources s_1...s_n sharing key k, repeated calls to next should yield
|
|
I x I.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="replay" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Replay the last Tuple emitted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close all child iterators.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="flush" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="value" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Write the next value into key, value as accepted by the operation
|
|
associated with this set of RecordReaders.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Collector for join values.
|
|
This accumulates values for a given key from the child RecordReaders. If
|
|
one or more child RR contain duplicate keys, this will emit the cross
|
|
product of the associated values until exhausted.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.InnerJoinRecordReader -->
|
|
<class name="InnerJoinRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.JoinRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="combine" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="srcs" type="java.lang.Object[]"/>
|
|
<param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
|
|
<doc>
|
|
<![CDATA[Return true iff the tuple is full (all data sources contain this key).]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Full inner join.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.InnerJoinRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.JoinRecordReader -->
|
|
<class name="JoinRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JoinRecordReader" type="int, org.apache.hadoop.conf.Configuration, int, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Emit the next set of key, value pairs as defined by the child
|
|
RecordReaders and operation associated with this composite RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValue" return="org.apache.hadoop.mapreduce.lib.join.TupleWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getDelegate" return="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return an iterator wrapping the JoinCollector.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Base class for Composite joins returning Tuples of arbitrary Writables.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.JoinRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.JoinRecordReader.JoinDelegationIterator -->
|
|
<class name="JoinRecordReader.JoinDelegationIterator" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/>
|
|
<constructor name="JoinRecordReader.JoinDelegationIterator"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="replay" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="item" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="clear"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Since the JoinCollector is effecting our operation, we need only
|
|
provide an iterator proxy wrapping its operation.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.JoinRecordReader.JoinDelegationIterator -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader -->
|
|
<class name="MultiFilterRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultiFilterRecordReader" type="int, org.apache.hadoop.conf.Configuration, int, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="emit" return="V"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For each tuple emitted, return a value (typically one of the values
|
|
in the tuple).
|
|
Modifying the Writables in the tuple is permitted and unlikely to affect
|
|
join behavior in most cases, but it is not recommended. It's safer to
|
|
clone first.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="combine" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="srcs" type="java.lang.Object[]"/>
|
|
<param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
|
|
<doc>
|
|
<![CDATA[Default implementation offers {@link #emit} every Tuple from the
|
|
collector (the outer join of child RRs).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="initialize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getDelegate" return="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return an iterator returning a single value from the tuple.
|
|
@see MultiFilterDelegationIterator]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Base class for Composite join returning values derived from multiple
|
|
sources, but generally not tuples.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader.MultiFilterDelegationIterator -->
|
|
<class name="MultiFilterRecordReader.MultiFilterDelegationIterator" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/>
|
|
<constructor name="MultiFilterRecordReader.MultiFilterDelegationIterator"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="replay" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="item" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="clear"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Proxy the JoinCollector, but include callback to emit.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader.MultiFilterDelegationIterator -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.OuterJoinRecordReader -->
|
|
<class name="OuterJoinRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.JoinRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="combine" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="srcs" type="java.lang.Object[]"/>
|
|
<param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
|
|
<doc>
|
|
<![CDATA[Emit everything from the collector.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Full outer join.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.OuterJoinRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.OverrideRecordReader -->
|
|
<class name="OverrideRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="emit" return="V"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/>
|
|
<doc>
|
|
<![CDATA[Emit the value with the highest position in the tuple.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValue" return="V"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="fillJoinCollector"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="iterkey" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Instead of filling the JoinCollector with iterators from all
|
|
data sources, fill only the rightmost for this key.
|
|
This not only saves space by discarding the other sources, but
|
|
it also emits the number of key-value pairs in the preferred
|
|
RecordReader instead of repeating that stream n times, where
|
|
n is the cardinality of the cross product of the discarded
|
|
streams for the given key.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Prefer the "rightmost" data source for this key.
|
|
For example, <tt>override(S1,S2,S3)</tt> will prefer values
|
|
from S3 over S2, and values from S2 over S1 for all keys
|
|
emitted from all sources.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.OverrideRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.Parser -->
|
|
<class name="Parser" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Parser"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[Very simple shift-reduce parser for join expressions.
|
|
|
|
This should be sufficient for the user extension permitted now, but ought to
|
|
be replaced with a parser generator if more complex grammars are supported.
|
|
In particular, this "shift-reduce" parser has no states. Each set
|
|
of formals requires a different internal node type, which is responsible for
|
|
interpreting the list of tokens it receives. This is sufficient for the
|
|
current grammar, but it has several annoying properties that might inhibit
|
|
extension. In particular, parenthesis are always function calls; an
|
|
algebraic or filter grammar would not only require a node type, but must
|
|
also work around the internals of this parser.
|
|
|
|
For most other cases, adding classes to the hierarchy- particularly by
|
|
extending JoinRecordReader and MultiFilterRecordReader- is fairly
|
|
straightforward. One need only override the relevant method(s) (usually only
|
|
{@link CompositeRecordReader#combine}) and include a property to map its
|
|
value to an identifier in the parser.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.Parser -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.Node -->
|
|
<class name="Parser.Node" extends="org.apache.hadoop.mapreduce.lib.join.ComposableInputFormat"
|
|
abstract="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Parser.Node" type="java.lang.String"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="addIdentifier"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="ident" type="java.lang.String"/>
|
|
<param name="mcstrSig" type="java.lang.Class[]"/>
|
|
<param name="nodetype" type="java.lang.Class"/>
|
|
<param name="cl" type="java.lang.Class"/>
|
|
<exception name="NoSuchMethodException" type="java.lang.NoSuchMethodException"/>
|
|
<doc>
|
|
<![CDATA[For a given identifier, add a mapping to the nodetype for the parse
|
|
tree and to the ComposableRecordReader to be created, including the
|
|
formals required to invoke the constructor.
|
|
The nodetype and constructor signature should be filled in from the
|
|
child node.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="id" type="int"/>
|
|
</method>
|
|
<method name="setKeyComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="cmpcl" type="java.lang.Class"/>
|
|
</method>
|
|
<field name="rrCstrMap" type="java.util.Map"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="id" type="int"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="ident" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="cmpcl" type="java.lang.Class"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.Node -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.NodeToken -->
|
|
<class name="Parser.NodeToken" extends="org.apache.hadoop.mapreduce.lib.join.Parser.Token"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="getNode" return="org.apache.hadoop.mapreduce.lib.join.Parser.Node"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.NodeToken -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.NumToken -->
|
|
<class name="Parser.NumToken" extends="org.apache.hadoop.mapreduce.lib.join.Parser.Token"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Parser.NumToken" type="double"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getNum" return="double"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.NumToken -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.StrToken -->
|
|
<class name="Parser.StrToken" extends="org.apache.hadoop.mapreduce.lib.join.Parser.Token"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Parser.StrToken" type="org.apache.hadoop.mapreduce.lib.join.Parser.TType, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getStr" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.StrToken -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.Token -->
|
|
<class name="Parser.Token" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="getType" return="org.apache.hadoop.mapreduce.lib.join.Parser.TType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getNode" return="org.apache.hadoop.mapreduce.lib.join.Parser.Node"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getNum" return="double"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getStr" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Tagged-union type for tokens from the join expression.
|
|
@see Parser.TType]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.Token -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.TType -->
|
|
<class name="Parser.TType" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapreduce.lib.join.Parser.TType[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapreduce.lib.join.Parser.TType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.TType -->
|
|
<!-- start interface org.apache.hadoop.mapreduce.lib.join.ResetableIterator -->
|
|
<interface name="ResetableIterator" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[True if a call to next may return a value. This is permitted false
|
|
positives, but not false negatives.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="T"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Assign next value to actual.
|
|
It is required that elements added to a ResetableIterator be returned in
|
|
the same order after a call to {@link #reset} (FIFO).
|
|
|
|
Note that a call to this may fail for nested joins (i.e. more elements
|
|
available, but none satisfying the constraints of the join)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="replay" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="T"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Assign last value returned to actual.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Set iterator to return to the start of its range. Must be called after
|
|
calling {@link #add} to avoid a ConcurrentModificationException.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="item" type="T"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add an element to the collection of elements to iterate over.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close datasources and release resources. Calling methods on the iterator
|
|
after calling close has undefined behavior.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="clear"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Close datasources, but do not release internal resources. Calling this
|
|
method should permit the object to be reused with a different datasource.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This defines an interface to a stateful Iterator that can replay elements
|
|
added to it directly.
|
|
Note that this does not extend {@link java.util.Iterator}.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapreduce.lib.join.ResetableIterator -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.ResetableIterator.EMPTY -->
|
|
<class name="ResetableIterator.EMPTY" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/>
|
|
<constructor name="ResetableIterator.EMPTY"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="clear"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="U"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="replay" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="U"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="item" type="U"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.ResetableIterator.EMPTY -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.StreamBackedIterator -->
|
|
<class name="StreamBackedIterator" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/>
|
|
<constructor name="StreamBackedIterator"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="X"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="replay" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="X"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="item" type="X"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="clear"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class provides an implementation of ResetableIterator. This
|
|
implementation uses a byte array to store elements added to it.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.StreamBackedIterator -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.TupleWritable -->
|
|
<class name="TupleWritable" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<implements name="java.lang.Iterable"/>
|
|
<constructor name="TupleWritable"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create an empty tuple with no allocated storage for writables.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TupleWritable" type="org.apache.hadoop.io.Writable[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Initialize tuple with storage; unknown whether any of them contain
|
|
"written" values.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="has" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<doc>
|
|
<![CDATA[Return true if tuple has an element at the position provided.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="get" return="org.apache.hadoop.io.Writable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<doc>
|
|
<![CDATA[Get ith Writable from Tuple.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="size" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The number of children in this Tuple.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="iterator" return="java.util.Iterator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return an iterator over the elements in this tuple.
|
|
Note that this doesn't flatten the tuple; one may receive tuples
|
|
from this iterator.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Convert Tuple to String as in the following.
|
|
<tt>[<child1>,<child2>,...,<childn>]</tt>]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Writes each Writable to <code>out</code>.
|
|
TupleWritable format:
|
|
{@code
|
|
<count><type1><type2>...<typen><obj1><obj2>...<objn>
|
|
}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<field name="written" type="java.util.BitSet"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[Writable type storing multiple {@link org.apache.hadoop.io.Writable}s.
|
|
|
|
This is *not* a general-purpose tuple type. In almost all cases, users are
|
|
encouraged to implement their own serializable types, which can perform
|
|
better validation and provide more efficient encodings than this class is
|
|
capable. TupleWritable relies on the join framework for type safety and
|
|
assumes its instances will rarely be persisted, assumptions not only
|
|
incompatible with, but contrary to the general case.
|
|
|
|
@see org.apache.hadoop.io.Writable]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.TupleWritable -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.join.WrappedRecordReader -->
|
|
<class name="WrappedRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="WrappedRecordReader" type="int"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="initialize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="createKey" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Request new key from proxied RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValue" return="U"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="id" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the key at the head of this RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="qkey" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Clone the key at the head of this RR into the object supplied.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return true if the RR- including the k,v pair stored in this object-
|
|
is exhausted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="skip"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector"/>
|
|
<param name="key" type="K"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Add an iterator to the collector at the position occupied by this
|
|
RecordReader over the values in this stream paired with the key
|
|
provided (ie register a stream of values from this source matching K
|
|
with a collector).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Read the next k,v pair into the head of this object; return true iff
|
|
the RR and this are exhausted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentKey" return="K"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get current key]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentValue" return="U"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get current value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Request progress from proxied RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Forward close request to proxied RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="compareTo" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"/>
|
|
<doc>
|
|
<![CDATA[Implement Comparable contract (compare key at head of proxied RR
|
|
with that of another).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Return true iff compareTo(other) retn true.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<field name="empty" type="boolean"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="cmp" type="org.apache.hadoop.io.WritableComparator"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[Proxy class for a RecordReader participating in the join framework.
|
|
|
|
This class keeps track of the "head" key-value pair for the
|
|
provided RecordReader and keeps a store of values matching a key when
|
|
this source is participating in a join.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.join.WrappedRecordReader -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.map">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.map.InverseMapper -->
|
|
<class name="InverseMapper" extends="org.apache.hadoop.mapreduce.Mapper"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="InverseMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[The inverse function. Input keys and values are swapped.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link Mapper} that swaps keys and values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.map.InverseMapper -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper -->
|
|
<class name="MultithreadedMapper" extends="org.apache.hadoop.mapreduce.Mapper"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultithreadedMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getNumberOfThreads" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[The number of threads in the thread pool that will run the map function.
|
|
@param job the job
|
|
@return the number of threads]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setNumberOfThreads"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="threads" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the number of threads in the pool for running maps.
|
|
@param job the job to modify
|
|
@param threads the new number of threads]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapperClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the application's mapper class.
|
|
@param <K1> the map's input key type
|
|
@param <V1> the map's input value type
|
|
@param <K2> the map's output key type
|
|
@param <V2> the map's output value type
|
|
@param job the job
|
|
@return the mapper class to run]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapperClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the application's mapper class.
|
|
@param <K1> the map input key type
|
|
@param <V1> the map input value type
|
|
@param <K2> the map output key type
|
|
@param <V2> the map output value type
|
|
@param job the job to modify
|
|
@param cls the class to use as the mapper]]>
|
|
</doc>
|
|
</method>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Run the application's maps using a thread pool.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="NUM_THREADS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="MAP_CLASS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[Multithreaded implementation for @link org.apache.hadoop.mapreduce.Mapper.
|
|
<p>
|
|
It can be used instead of the default implementation,
|
|
@link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU
|
|
bound in order to improve throughput.
|
|
<p>
|
|
Mapper implementations using this MapRunnable must be thread-safe.
|
|
<p>
|
|
The Map-Reduce job has to be configured with the mapper to use via
|
|
{@link #setMapperClass(Configuration, Class)} and
|
|
the number of thread the thread-pool can use with the
|
|
{@link #getNumberOfThreads(Configuration) method. The default
|
|
value is 10 threads.
|
|
<p>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.map.RegexMapper -->
|
|
<class name="RegexMapper" extends="org.apache.hadoop.mapreduce.Mapper"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="RegexMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
</method>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="org.apache.hadoop.io.Text"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<field name="PATTERN" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="GROUP" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A {@link Mapper} that extracts text matching a regular expression.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.map.RegexMapper -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper -->
|
|
<class name="TokenCounterMapper" extends="org.apache.hadoop.mapreduce.Mapper"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TokenCounterMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="org.apache.hadoop.io.Text"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Tokenize the input values and emit each word with a count of 1.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.map.WrappedMapper -->
|
|
<class name="WrappedMapper" extends="org.apache.hadoop.mapreduce.Mapper"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="WrappedMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getMapContext" return="org.apache.hadoop.mapreduce.Mapper.Context"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="mapContext" type="org.apache.hadoop.mapreduce.MapContext"/>
|
|
<doc>
|
|
<![CDATA[Get a wrapped {@link Mapper.Context} for custom implementations.
|
|
@param mapContext <code>MapContext</code> to be wrapped
|
|
@return a wrapped <code>Mapper.Context</code> for custom implementations]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link Mapper} which wraps a given one to allow custom
|
|
{@link Mapper.Context} implementations.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.map.WrappedMapper -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.map.WrappedMapper.Context -->
|
|
<class name="WrappedMapper.Context" extends="org.apache.hadoop.mapreduce.Mapper.Context"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="WrappedMapper.Context" type="org.apache.hadoop.mapreduce.MapContext"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getInputSplit" return="org.apache.hadoop.mapreduce.InputSplit"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the input split for this map.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentKey" return="KEYIN"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getCurrentValue" return="VALUEIN"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="counterName" type="java.lang.Enum"/>
|
|
</method>
|
|
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="groupName" type="java.lang.String"/>
|
|
<param name="counterName" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="KEYOUT"/>
|
|
<param name="value" type="VALUEOUT"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getStatus" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getTaskAttemptID" return="org.apache.hadoop.mapreduce.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="msg" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getArchiveClassPaths" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getArchiveTimestamps" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getCacheArchives" return="java.net.URI[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getCacheFiles" return="java.net.URI[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getCombinerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
</method>
|
|
<method name="getConfiguration" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getFileClassPaths" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getFileTimestamps" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getGroupingComparator" return="org.apache.hadoop.io.RawComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getInputFormatClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
</method>
|
|
<method name="getJar" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobSetupCleanupNeeded" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getLocalCacheArchives" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getLocalCacheFiles" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getMapOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMapOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMapperClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
</method>
|
|
<method name="getMaxMapAttempts" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMaxReduceAttempts" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getNumReduceTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getOutputFormatClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
</method>
|
|
<method name="getOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getPartitionerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
</method>
|
|
<method name="getReducerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
</method>
|
|
<method name="getSortComparator" return="org.apache.hadoop.io.RawComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getSymlink" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="progress"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getProfileEnabled" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getProfileParams" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getProfileTaskRange" return="org.apache.hadoop.conf.Configuration.IntegerRanges"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="isMap" type="boolean"/>
|
|
</method>
|
|
<method name="getUser" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<field name="mapContext" type="org.apache.hadoop.mapreduce.MapContext"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.map.WrappedMapper.Context -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.output">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter -->
|
|
<class name="FileOutputCommitter" extends="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="FileOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.TaskAttemptContext"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Create a file output committer
|
|
@param outputPath the job's output path
|
|
@param context the task's context
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="setupJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Create the temporary directory that is the root of all of the task
|
|
work directories.
|
|
@param context the job's context]]>
|
|
</doc>
|
|
</method>
|
|
<method name="commitJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Delete the temporary directory, including all of the work directories.
|
|
Create a _SUCCESS file to make it as successful.
|
|
@param context the job's context]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanupJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="abortJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<param name="state" type="org.apache.hadoop.mapreduce.JobStatus.State"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Delete the temporary directory, including all of the work directories.
|
|
@param context the job's context]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setupTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[No task setup required.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="commitTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Move the files from the work directory to the job output directory
|
|
@param context the task context]]>
|
|
</doc>
|
|
</method>
|
|
<method name="abortTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Delete the work directory
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="needsTaskCommit" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Did this task write any files in the work directory?
|
|
@param context the task's context]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getWorkPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the directory that the task should write results into
|
|
@return the work directory
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<field name="TEMP_DIR_NAME" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Temporary directory name]]>
|
|
</doc>
|
|
</field>
|
|
<field name="SUCCEEDED_FILE_NAME" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[An {@link OutputCommitter} that commits files specified
|
|
in job output directory i.e. ${mapreduce.output.fileoutputformat.outputdir}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat -->
|
|
<class name="FileOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="FileOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setCompressOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="compress" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the output of the job is compressed.
|
|
@param job the job to modify
|
|
@param compress should the output of the job be compressed?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCompressOutput" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Is the job output compressed?
|
|
@param job the Job to look in
|
|
@return <code>true</code> if the job output should be compressed,
|
|
<code>false</code> otherwise]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputCompressorClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="codecClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link CompressionCodec} to be used to compress job outputs.
|
|
@param job the job to modify
|
|
@param codecClass the {@link CompressionCodec} to be used to
|
|
compress the job outputs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputCompressorClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<param name="defaultValue" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link CompressionCodec} for compressing the job outputs.
|
|
@param job the {@link Job} to look in
|
|
@param defaultValue the {@link CompressionCodec} to return if not set
|
|
@return the {@link CompressionCodec} to be used to compress the
|
|
job outputs
|
|
@throws IllegalArgumentException if the class was specified, but not found]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="FileAlreadyExistsException" type="org.apache.hadoop.fs.FileAlreadyExistsException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="setOutputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="outputDir" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Path} of the output directory for the map-reduce job.
|
|
|
|
@param job The job to modify
|
|
@param outputDir the {@link Path} of the output directory for
|
|
the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Path} to the output directory for the map-reduce job.
|
|
|
|
@return the {@link Path} to the output directory for the map-reduce job.
|
|
@see FileOutputFormat#getWorkOutputPath(TaskInputOutputContext)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getWorkOutputPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskInputOutputContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Path} to the task's temporary output directory
|
|
for the map-reduce job
|
|
|
|
<h4 id="SideEffectFiles">Tasks' Side-Effect Files</h4>
|
|
|
|
<p>Some applications need to create/write-to side-files, which differ from
|
|
the actual job-outputs.
|
|
|
|
<p>In such cases there could be issues with 2 instances of the same TIP
|
|
(running simultaneously e.g. speculative tasks) trying to open/write-to the
|
|
same file (path) on HDFS. Hence the application-writer will have to pick
|
|
unique names per task-attempt (e.g. using the attemptid, say
|
|
<tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p>
|
|
|
|
<p>To get around this the Map-Reduce framework helps the application-writer
|
|
out by maintaining a special
|
|
<tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt>
|
|
sub-directory for each task-attempt on HDFS where the output of the
|
|
task-attempt goes. On successful completion of the task-attempt the files
|
|
in the <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> (only)
|
|
are <i>promoted</i> to <tt>${mapreduce.output.fileoutputformat.outputdir}</tt>. Of course, the
|
|
framework discards the sub-directory of unsuccessful task-attempts. This
|
|
is completely transparent to the application.</p>
|
|
|
|
<p>The application-writer can take advantage of this by creating any
|
|
side-files required in a work directory during execution
|
|
of his task i.e. via
|
|
{@link #getWorkOutputPath(TaskInputOutputContext)}, and
|
|
the framework will move them out similarly - thus she doesn't have to pick
|
|
unique paths per task-attempt.</p>
|
|
|
|
<p>The entire discussion holds true for maps of jobs with
|
|
reducer=NONE (i.e. 0 reduces) since output of the map, in that case,
|
|
goes directly to HDFS.</p>
|
|
|
|
@return the {@link Path} to the task's temporary output directory
|
|
for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPathForWorkFile" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskInputOutputContext"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="extension" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Helper function to generate a {@link Path} for a file that is unique for
|
|
the task within the job output directory.
|
|
|
|
<p>The path can be used to create custom files from within the map and
|
|
reduce tasks. The path name will be unique for each task. The path parent
|
|
will be the job output directory.</p>ls
|
|
|
|
<p>This method uses the {@link #getUniqueFile} method to make the file name
|
|
unique for the task.</p>
|
|
|
|
@param context the context for the task.
|
|
@param name the name for the file.
|
|
@param extension the extension for the file
|
|
@return a unique path accross all tasks of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUniqueFile" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="extension" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Generate a unique filename, based on the task id, name, and extension
|
|
@param context the task that is calling this
|
|
@param name the base filename
|
|
@param extension the filename extension
|
|
@return a string like $name-[mrsct]-$id$extension]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDefaultWorkFile" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<param name="extension" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the default path and filename for the output format.
|
|
@param context the task context
|
|
@param extension an extension to add to the filename
|
|
@return a full path $output/_temporary/$taskid/part-[mr]-$id
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the base output name for the output file.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the base output name for output file to be created.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="BASE_OUTPUT_NAME" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="PART" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="COMPRESS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="COMPRESS_CODEC" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="COMPRESS_TYPE" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="OUTDIR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A base class for {@link OutputFormat}s that read from {@link FileSystem}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat -->
|
|
<class name="FilterOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="FilterOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="FilterOutputFormat" type="org.apache.hadoop.mapreduce.OutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a FilterOutputFormat based on the underlying output format.
|
|
@param baseOut the underlying OutputFormat]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<field name="baseOut" type="org.apache.hadoop.mapreduce.OutputFormat"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[FilterOutputFormat is a convenience class that wraps OutputFormat.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat.FilterRecordWriter -->
|
|
<class name="FilterOutputFormat.FilterRecordWriter" extends="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="FilterOutputFormat.FilterRecordWriter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="FilterOutputFormat.FilterRecordWriter" type="org.apache.hadoop.mapreduce.RecordWriter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<field name="rawWriter" type="org.apache.hadoop.mapreduce.RecordWriter"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[<code>FilterRecordWriter</code> is a convenience wrapper
|
|
class that extends the {@link RecordWriter}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat.FilterRecordWriter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat -->
|
|
<class name="LazyOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="LazyOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setOutputFormatClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the underlying output format for LazyOutputFormat.
|
|
@param job the {@link Job} to modify
|
|
@param theClass the underlying class]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<field name="OUTPUT_FORMAT" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A Convenience class that creates output lazily.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat -->
|
|
<class name="MapFileOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MapFileOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getReaders" return="org.apache.hadoop.io.MapFile.Reader[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="dir" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Open the output generated by this format.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getEntry" return="org.apache.hadoop.io.Writable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="readers" type="org.apache.hadoop.io.MapFile.Reader[]"/>
|
|
<param name="partitioner" type="org.apache.hadoop.mapreduce.Partitioner"/>
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get an entry from output generated by this class.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link org.apache.hadoop.mapreduce.OutputFormat} that writes
|
|
{@link MapFile}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.MultipleOutputs -->
|
|
<class name="MultipleOutputs" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultipleOutputs" type="org.apache.hadoop.mapreduce.TaskInputOutputContext"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Creates and initializes multiple outputs support,
|
|
it should be instantiated in the Mapper/Reducer setup method.
|
|
|
|
@param context the TaskInputOutputContext object]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addNamedOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<param name="outputFormatClass" type="java.lang.Class"/>
|
|
<param name="keyClass" type="java.lang.Class"/>
|
|
<param name="valueClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Adds a named output for the job.
|
|
<p/>
|
|
|
|
@param job job to add the named output
|
|
@param namedOutput named output name, it has to be a word, letters
|
|
and numbers only, cannot be the word 'part' as
|
|
that is reserved for the default output.
|
|
@param outputFormatClass OutputFormat class.
|
|
@param keyClass key class
|
|
@param valueClass value class]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setCountersEnabled"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="enabled" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Enables or disables counters for the named outputs.
|
|
|
|
The counters group is the {@link MultipleOutputs} class name.
|
|
The names of the counters are the same as the named outputs. These
|
|
counters count the number records written to each output name.
|
|
By default these counters are disabled.
|
|
|
|
@param job job to enable counters
|
|
@param enabled indicates if the counters will be enabled or not.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCountersEnabled" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Returns if the counters for the named outputs are enabled or not.
|
|
By default these counters are disabled.
|
|
|
|
@param job the job
|
|
@return TRUE if the counters are enabled, FALSE if they are disabled.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Write key and value to the namedOutput.
|
|
|
|
Output path is a unique file generated for the namedOutput.
|
|
For example, {namedOutput}-(m|r)-{part-number}
|
|
|
|
@param namedOutput the named output name
|
|
@param key the key
|
|
@param value the value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<param name="baseOutputPath" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Write key and value to baseOutputPath using the namedOutput.
|
|
|
|
@param namedOutput the named output name
|
|
@param key the key
|
|
@param value the value
|
|
@param baseOutputPath base-output path to write the record to.
|
|
Note: Framework will generate unique filename for the baseOutputPath]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="KEYOUT"/>
|
|
<param name="value" type="VALUEOUT"/>
|
|
<param name="baseOutputPath" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Write key value to an output file name.
|
|
|
|
Gets the record writer from job's output format.
|
|
Job's output format should be a FileOutputFormat.
|
|
|
|
@param key the key
|
|
@param value the value
|
|
@param baseOutputPath base-output path to write the record to.
|
|
Note: Framework will generate unique filename for the baseOutputPath]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Closes all the opened outputs.
|
|
|
|
This should be called from cleanup method of map/reduce task.
|
|
If overridden subclasses must invoke <code>super.close()</code> at the
|
|
end of their <code>close()</code>]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The MultipleOutputs class simplifies writing output data
|
|
to multiple outputs
|
|
|
|
<p>
|
|
Case one: writing to additional outputs other than the job default output.
|
|
|
|
Each additional output, or named output, may be configured with its own
|
|
<code>OutputFormat</code>, with its own key class and with its own value
|
|
class.
|
|
|
|
<p>
|
|
Case two: to write data to different files provided by user
|
|
</p>
|
|
|
|
<p>
|
|
MultipleOutputs supports counters, by default they are disabled. The
|
|
counters group is the {@link MultipleOutputs} class name. The names of the
|
|
counters are the same as the output name. These count the number records
|
|
written to each output name.
|
|
</p>
|
|
|
|
Usage pattern for job submission:
|
|
<pre>
|
|
|
|
Job job = new Job();
|
|
|
|
FileInputFormat.setInputPath(job, inDir);
|
|
FileOutputFormat.setOutputPath(job, outDir);
|
|
|
|
job.setMapperClass(MOMap.class);
|
|
job.setReducerClass(MOReduce.class);
|
|
...
|
|
|
|
// Defines additional single text based output 'text' for the job
|
|
MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class,
|
|
LongWritable.class, Text.class);
|
|
|
|
// Defines additional sequence-file based output 'sequence' for the job
|
|
MultipleOutputs.addNamedOutput(job, "seq",
|
|
SequenceFileOutputFormat.class,
|
|
LongWritable.class, Text.class);
|
|
...
|
|
|
|
job.waitForCompletion(true);
|
|
...
|
|
</pre>
|
|
<p>
|
|
Usage in Reducer:
|
|
<pre>
|
|
<K, V> String generateFileName(K k, V v) {
|
|
return k.toString() + "_" + v.toString();
|
|
}
|
|
|
|
public class MOReduce extends
|
|
Reducer<WritableComparable, Writable,WritableComparable, Writable> {
|
|
private MultipleOutputs mos;
|
|
public void setup(Context context) {
|
|
...
|
|
mos = new MultipleOutputs(context);
|
|
}
|
|
|
|
public void reduce(WritableComparable key, Iterator<Writable> values,
|
|
Context context)
|
|
throws IOException {
|
|
...
|
|
mos.write("text", , key, new Text("Hello"));
|
|
mos.write("seq", LongWritable(1), new Text("Bye"), "seq_a");
|
|
mos.write("seq", LongWritable(2), key, new Text("Chau"), "seq_b");
|
|
mos.write(key, new Text("value"), generateFileName(key, new Text("value")));
|
|
...
|
|
}
|
|
|
|
public void cleanup(Context) throws IOException {
|
|
mos.close();
|
|
...
|
|
}
|
|
|
|
}
|
|
</pre>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.MultipleOutputs -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.NullOutputFormat -->
|
|
<class name="NullOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="NullOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
</method>
|
|
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Consume all outputs and put them in /dev/null.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.NullOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat -->
|
|
<class name="SequenceFileAsBinaryOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileAsBinaryOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setSequenceFileOutputKeyClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the key class for the {@link SequenceFile}
|
|
<p>This allows the user to specify the key class to be different
|
|
from the actual class ({@link BytesWritable}) used for writing </p>
|
|
|
|
@param job the {@link Job} to modify
|
|
@param theClass the SequenceFile output key class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSequenceFileOutputValueClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the value class for the {@link SequenceFile}
|
|
<p>This allows the user to specify the value class to be different
|
|
from the actual class ({@link BytesWritable}) used for writing </p>
|
|
|
|
@param job the {@link Job} to modify
|
|
@param theClass the SequenceFile output key class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSequenceFileOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the key class for the {@link SequenceFile}
|
|
|
|
@return the key class of the {@link SequenceFile}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSequenceFileOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the value class for the {@link SequenceFile}
|
|
|
|
@return the value class of the {@link SequenceFile}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="KEY_CLASS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="VALUE_CLASS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[An {@link org.apache.hadoop.mapreduce.OutputFormat} that writes keys,
|
|
values to {@link SequenceFile}s in binary(raw) format]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat.WritableValueBytes -->
|
|
<class name="SequenceFileAsBinaryOutputFormat.WritableValueBytes" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.SequenceFile.ValueBytes"/>
|
|
<constructor name="SequenceFileAsBinaryOutputFormat.WritableValueBytes"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="SequenceFileAsBinaryOutputFormat.WritableValueBytes" type="org.apache.hadoop.io.BytesWritable"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="value" type="org.apache.hadoop.io.BytesWritable"/>
|
|
</method>
|
|
<method name="writeUncompressedBytes"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="outStream" type="java.io.DataOutputStream"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="writeCompressedBytes"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="outStream" type="java.io.DataOutputStream"/>
|
|
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getSize" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Inner class used for appendRaw]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat.WritableValueBytes -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat -->
|
|
<class name="SequenceFileOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getSequenceWriter" return="org.apache.hadoop.io.SequenceFile.Writer"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<param name="keyClass" type="java.lang.Class"/>
|
|
<param name="valueClass" type="java.lang.Class"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getOutputCompressionType" return="org.apache.hadoop.io.SequenceFile.CompressionType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link CompressionType} for the output {@link SequenceFile}.
|
|
@param job the {@link Job}
|
|
@return the {@link CompressionType} for the output {@link SequenceFile},
|
|
defaulting to {@link CompressionType#RECORD}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputCompressionType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="style" type="org.apache.hadoop.io.SequenceFile.CompressionType"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link CompressionType} for the output {@link SequenceFile}.
|
|
@param job the {@link Job} to modify
|
|
@param style the {@link CompressionType} for the output
|
|
{@link SequenceFile}]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link OutputFormat} that writes {@link SequenceFile}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat -->
|
|
<class name="TextOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TextOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<field name="SEPERATOR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[An {@link OutputFormat} that writes plain text files.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.LineRecordWriter -->
|
|
<class name="TextOutputFormat.LineRecordWriter" extends="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="out" type="java.io.DataOutputStream"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.LineRecordWriter -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.partition">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner -->
|
|
<class name="BinaryPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.conf.Configurable"/>
|
|
<constructor name="BinaryPartitioner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setOffsets"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="left" type="int"/>
|
|
<param name="right" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the subarray to be used for partitioning to
|
|
<code>bytes[left:(right+1)]</code> in Python syntax.
|
|
|
|
@param conf configuration object
|
|
@param left left Python-style offset
|
|
@param right right Python-style offset]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setLeftOffset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="offset" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the subarray to be used for partitioning to
|
|
<code>bytes[offset:]</code> in Python syntax.
|
|
|
|
@param conf configuration object
|
|
@param offset left Python-style offset]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setRightOffset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="offset" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the subarray to be used for partitioning to
|
|
<code>bytes[:(offset+1)]</code> in Python syntax.
|
|
|
|
@param conf configuration object
|
|
@param offset right Python-style offset]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getPartition" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.BinaryComparable"/>
|
|
<param name="value" type="V"/>
|
|
<param name="numPartitions" type="int"/>
|
|
<doc>
|
|
<![CDATA[Use (the specified slice of the array returned by)
|
|
{@link BinaryComparable#getBytes()} to partition.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="LEFT_OFFSET_PROPERTY_NAME" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="RIGHT_OFFSET_PROPERTY_NAME" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[<p>Partition {@link BinaryComparable} keys using a configurable part of
|
|
the bytes array returned by {@link BinaryComparable#getBytes()}.</p>
|
|
|
|
<p>The subarray to be used for the partitioning can be defined by means
|
|
of the following properties:
|
|
<ul>
|
|
<li>
|
|
<i>mapreduce.partition.binarypartitioner.left.offset</i>:
|
|
left offset in array (0 by default)
|
|
</li>
|
|
<li>
|
|
<i>mapreduce.partition.binarypartitioner.right.offset</i>:
|
|
right offset in array (-1 by default)
|
|
</li>
|
|
</ul>
|
|
Like in Python, both negative and positive offsets are allowed, but
|
|
the meaning is slightly different. In case of an array of length 5,
|
|
for instance, the possible offsets are:
|
|
<pre><code>
|
|
+---+---+---+---+---+
|
|
| B | B | B | B | B |
|
|
+---+---+---+---+---+
|
|
0 1 2 3 4
|
|
-5 -4 -3 -2 -1
|
|
</code></pre>
|
|
The first row of numbers gives the position of the offsets 0...5 in
|
|
the array; the second row gives the corresponding negative offsets.
|
|
Contrary to Python, the specified subarray has byte <code>i</code>
|
|
and <code>j</code> as first and last element, repectively, when
|
|
<code>i</code> and <code>j</code> are the left and right offset.
|
|
|
|
<p>For Hadoop programs written in Java, it is advisable to use one of
|
|
the following static convenience methods for setting the offsets:
|
|
<ul>
|
|
<li>{@link #setOffsets}</li>
|
|
<li>{@link #setLeftOffset}</li>
|
|
<li>{@link #setRightOffset}</li>
|
|
</ul></p>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.partition.HashPartitioner -->
|
|
<class name="HashPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="HashPartitioner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getPartition" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<param name="numReduceTasks" type="int"/>
|
|
<doc>
|
|
<![CDATA[Use {@link Object#hashCode()} to partition.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Partition keys by their {@link Object#hashCode()}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.partition.HashPartitioner -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.partition.InputSampler -->
|
|
<class name="InputSampler" extends="org.apache.hadoop.conf.Configured"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Tool"/>
|
|
<constructor name="InputSampler" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="writePartitionFile"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="sampler" type="org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Write a partition file for the given job, using the Sampler provided.
|
|
Queries the sampler for a sample keyset, sorts by the output key
|
|
comparator, selects the keys for each rank, and writes to the destination
|
|
returned from {@link TotalOrderPartitioner#getPartitionFile}.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
<doc>
|
|
<![CDATA[Driver for InputSampler from the command line.
|
|
Configures a JobConf instance and calls {@link #writePartitionFile}.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Utility for collecting samples and writing a partition file for
|
|
{@link TotalOrderPartitioner}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.partition.InputSampler -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.partition.InputSampler.IntervalSampler -->
|
|
<class name="InputSampler.IntervalSampler" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler"/>
|
|
<constructor name="InputSampler.IntervalSampler" type="double"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a new IntervalSampler sampling <em>all</em> splits.
|
|
@param freq The frequency with which records will be emitted.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="InputSampler.IntervalSampler" type="double, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a new IntervalSampler.
|
|
@param freq The frequency with which records will be emitted.
|
|
@param maxSplitsSampled The maximum number of splits to examine.
|
|
@see #getSample]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getSample" return="K[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="inf" type="org.apache.hadoop.mapreduce.InputFormat"/>
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[For each split sampled, emit when the ratio of the number of records
|
|
retained to the total record count is less than the specified
|
|
frequency.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Sample from s splits at regular intervals.
|
|
Useful for sorted data.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.partition.InputSampler.IntervalSampler -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.partition.InputSampler.RandomSampler -->
|
|
<class name="InputSampler.RandomSampler" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler"/>
|
|
<constructor name="InputSampler.RandomSampler" type="double, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a new RandomSampler sampling <em>all</em> splits.
|
|
This will read every split at the client, which is very expensive.
|
|
@param freq Probability with which a key will be chosen.
|
|
@param numSamples Total number of samples to obtain from all selected
|
|
splits.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="InputSampler.RandomSampler" type="double, int, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a new RandomSampler.
|
|
@param freq Probability with which a key will be chosen.
|
|
@param numSamples Total number of samples to obtain from all selected
|
|
splits.
|
|
@param maxSplitsSampled The maximum number of splits to examine.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getSample" return="K[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="inf" type="org.apache.hadoop.mapreduce.InputFormat"/>
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Randomize the split order, then take the specified number of keys from
|
|
each split sampled, where each key is selected with the specified
|
|
probability and possibly replaced by a subsequently selected key when
|
|
the quota of keys from that split is satisfied.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Sample from random points in the input.
|
|
General-purpose sampler. Takes numSamples / maxSplitsSampled inputs from
|
|
each split.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.partition.InputSampler.RandomSampler -->
|
|
<!-- start interface org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler -->
|
|
<interface name="InputSampler.Sampler" abstract="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="getSample" return="K[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="inf" type="org.apache.hadoop.mapreduce.InputFormat"/>
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[For a given job, collect and return a subset of the keys from the
|
|
input data.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Interface to sample using an
|
|
{@link org.apache.hadoop.mapreduce.InputFormat}.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.partition.InputSampler.SplitSampler -->
|
|
<class name="InputSampler.SplitSampler" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler"/>
|
|
<constructor name="InputSampler.SplitSampler" type="int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a SplitSampler sampling <em>all</em> splits.
|
|
Takes the first numSamples / numSplits records from each split.
|
|
@param numSamples Total number of samples to obtain from all selected
|
|
splits.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="InputSampler.SplitSampler" type="int, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a new SplitSampler.
|
|
@param numSamples Total number of samples to obtain from all selected
|
|
splits.
|
|
@param maxSplitsSampled The maximum number of splits to examine.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getSample" return="K[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="inf" type="org.apache.hadoop.mapreduce.InputFormat"/>
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[From each split sampled, take the first numSamples / numSplits records.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Samples the first n records from s splits.
|
|
Inexpensive way to sample random data.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.partition.InputSampler.SplitSampler -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator -->
|
|
<class name="KeyFieldBasedComparator" extends="org.apache.hadoop.io.WritableComparator"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.conf.Configurable"/>
|
|
<constructor name="KeyFieldBasedComparator"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="compare" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="b1" type="byte[]"/>
|
|
<param name="s1" type="int"/>
|
|
<param name="l1" type="int"/>
|
|
<param name="b2" type="byte[]"/>
|
|
<param name="s2" type="int"/>
|
|
<param name="l2" type="int"/>
|
|
</method>
|
|
<method name="setKeyFieldComparatorOptions"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="keySpec" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link KeyFieldBasedComparator} options used to compare keys.
|
|
|
|
@param keySpec the key specification of the form -k pos1[,pos2], where,
|
|
pos is of the form f[.c][opts], where f is the number
|
|
of the key field to use, and c is the number of the first character from
|
|
the beginning of the field. Fields and character posns are numbered
|
|
starting with 1; a character position of zero in pos2 indicates the
|
|
field's last character. If '.c' is omitted from pos1, it defaults to 1
|
|
(the beginning of the field); if omitted from pos2, it defaults to 0
|
|
(the end of the field). opts are ordering options. The supported options
|
|
are:
|
|
-n, (Sort numerically)
|
|
-r, (Reverse the result of comparison)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getKeyFieldComparatorOption" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link KeyFieldBasedComparator} options]]>
|
|
</doc>
|
|
</method>
|
|
<field name="COMPARATOR_OPTIONS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This comparator implementation provides a subset of the features provided
|
|
by the Unix/GNU Sort. In particular, the supported features are:
|
|
-n, (Sort numerically)
|
|
-r, (Reverse the result of comparison)
|
|
-k pos1[,pos2], where pos is of the form f[.c][opts], where f is the number
|
|
of the field to use, and c is the number of the first character from the
|
|
beginning of the field. Fields and character posns are numbered starting
|
|
with 1; a character position of zero in pos2 indicates the field's last
|
|
character. If '.c' is omitted from pos1, it defaults to 1 (the beginning
|
|
of the field); if omitted from pos2, it defaults to 0 (the end of the
|
|
field). opts are ordering options (any of 'nr' as described above).
|
|
We assume that the fields in the key are separated by
|
|
{@link JobContext#MAP_OUTPUT_KEY_FIELD_SEPERATOR}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedPartitioner -->
|
|
<class name="KeyFieldBasedPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.conf.Configurable"/>
|
|
<constructor name="KeyFieldBasedPartitioner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getPartition" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K2"/>
|
|
<param name="value" type="V2"/>
|
|
<param name="numReduceTasks" type="int"/>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="b" type="byte[]"/>
|
|
<param name="start" type="int"/>
|
|
<param name="end" type="int"/>
|
|
<param name="currentHash" type="int"/>
|
|
</method>
|
|
<method name="getPartition" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="hash" type="int"/>
|
|
<param name="numReduceTasks" type="int"/>
|
|
</method>
|
|
<method name="setKeyFieldPartitionerOptions"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="keySpec" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link KeyFieldBasedPartitioner} options used for
|
|
{@link Partitioner}
|
|
|
|
@param keySpec the key specification of the form -k pos1[,pos2], where,
|
|
pos is of the form f[.c][opts], where f is the number
|
|
of the key field to use, and c is the number of the first character from
|
|
the beginning of the field. Fields and character posns are numbered
|
|
starting with 1; a character position of zero in pos2 indicates the
|
|
field's last character. If '.c' is omitted from pos1, it defaults to 1
|
|
(the beginning of the field); if omitted from pos2, it defaults to 0
|
|
(the end of the field).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getKeyFieldPartitionerOption" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link KeyFieldBasedPartitioner} options]]>
|
|
</doc>
|
|
</method>
|
|
<field name="PARTITIONER_OPTIONS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[Defines a way to partition keys based on certain key fields (also see
|
|
{@link KeyFieldBasedComparator}.
|
|
The key specification supported is of the form -k pos1[,pos2], where,
|
|
pos is of the form f[.c][opts], where f is the number
|
|
of the key field to use, and c is the number of the first character from
|
|
the beginning of the field. Fields and character posns are numbered
|
|
starting with 1; a character position of zero in pos2 indicates the
|
|
field's last character. If '.c' is omitted from pos1, it defaults to 1
|
|
(the beginning of the field); if omitted from pos2, it defaults to 0
|
|
(the end of the field).]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedPartitioner -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner -->
|
|
<class name="TotalOrderPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.conf.Configurable"/>
|
|
<constructor name="TotalOrderPartitioner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Read in the partition file and build indexing data structures.
|
|
If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
|
|
<tt>total.order.partitioner.natural.order</tt> is not false, a trie
|
|
of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
|
|
will be built. Otherwise, keys will be located using a binary search of
|
|
the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
|
|
defined for this job. The input file must be sorted with the same
|
|
comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getPartition" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="V"/>
|
|
<param name="numPartitions" type="int"/>
|
|
</method>
|
|
<method name="setPartitionFile"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="p" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Set the path to the SequenceFile storing the sorted partition keyset.
|
|
It must be the case that for <tt>R</tt> reduces, there are <tt>R-1</tt>
|
|
keys in the SequenceFile.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPartitionFile" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Get the path to the SequenceFile storing the sorted partition keyset.
|
|
@see #setPartitionFile(Configuration, Path)]]>
|
|
</doc>
|
|
</method>
|
|
<field name="DEFAULT_PATH" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="PARTITIONER_PATH" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="MAX_TRIE_DEPTH" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="NATURAL_ORDER" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[Partitioner effecting a total order by reading split points from
|
|
an externally generated source.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.reduce">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer -->
|
|
<class name="IntSumReducer" extends="org.apache.hadoop.mapreduce.Reducer"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="IntSumReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="Key"/>
|
|
<param name="values" type="java.lang.Iterable"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer -->
|
|
<class name="LongSumReducer" extends="org.apache.hadoop.mapreduce.Reducer"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="LongSumReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="KEY"/>
|
|
<param name="values" type="java.lang.Iterable"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer -->
|
|
<class name="WrappedReducer" extends="org.apache.hadoop.mapreduce.Reducer"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="WrappedReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getReducerContext" return="org.apache.hadoop.mapreduce.Reducer.Context"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="reduceContext" type="org.apache.hadoop.mapreduce.ReduceContext"/>
|
|
<doc>
|
|
<![CDATA[A a wrapped {@link Reducer.Context} for custom implementations.
|
|
@param reduceContext <code>ReduceContext</code> to be wrapped
|
|
@return a wrapped <code>Reducer.Context</code> for custom implementations]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link Reducer} which wraps a given one to allow for custom
|
|
{@link Reducer.Context} implementations.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer.Context -->
|
|
<class name="WrappedReducer.Context" extends="org.apache.hadoop.mapreduce.Reducer.Context"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="WrappedReducer.Context" type="org.apache.hadoop.mapreduce.ReduceContext"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getCurrentKey" return="KEYIN"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getCurrentValue" return="VALUEIN"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="counterName" type="java.lang.Enum"/>
|
|
</method>
|
|
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="groupName" type="java.lang.String"/>
|
|
<param name="counterName" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="KEYOUT"/>
|
|
<param name="value" type="VALUEOUT"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getStatus" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getTaskAttemptID" return="org.apache.hadoop.mapreduce.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="msg" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getArchiveClassPaths" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getArchiveTimestamps" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getCacheArchives" return="java.net.URI[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getCacheFiles" return="java.net.URI[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getCombinerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
</method>
|
|
<method name="getConfiguration" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getFileClassPaths" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getFileTimestamps" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getGroupingComparator" return="org.apache.hadoop.io.RawComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getInputFormatClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
</method>
|
|
<method name="getJar" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobSetupCleanupNeeded" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getLocalCacheArchives" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getLocalCacheFiles" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getMapOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMapOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMapperClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
</method>
|
|
<method name="getMaxMapAttempts" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMaxReduceAttempts" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getNumReduceTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getOutputFormatClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
</method>
|
|
<method name="getOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getPartitionerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
</method>
|
|
<method name="getReducerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
</method>
|
|
<method name="getSortComparator" return="org.apache.hadoop.io.RawComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getSymlink" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="progress"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getValues" return="java.lang.Iterable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="nextKey" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getProfileEnabled" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getProfileParams" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getProfileTaskRange" return="org.apache.hadoop.conf.Configuration.IntegerRanges"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="isMap" type="boolean"/>
|
|
</method>
|
|
<method name="getUser" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<field name="reduceContext" type="org.apache.hadoop.mapreduce.ReduceContext"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer.Context -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.security">
|
|
<!-- start class org.apache.hadoop.mapreduce.security.TokenCache -->
|
|
<class name="TokenCache" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TokenCache"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getSecretKey" return="byte[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="alias" type="org.apache.hadoop.io.Text"/>
|
|
<doc>
|
|
<![CDATA[auxiliary method to get user's secret keys..
|
|
@param alias
|
|
@return secret key from the storage]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addSecretKey"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="alias" type="org.apache.hadoop.io.Text"/>
|
|
<param name="key" type="byte[]"/>
|
|
<doc>
|
|
<![CDATA[auxiliary methods to store user' s secret keys
|
|
@param alias
|
|
@param key]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addDelegationToken"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="namenode" type="java.lang.String"/>
|
|
<param name="t" type="org.apache.hadoop.security.token.Token"/>
|
|
<doc>
|
|
<![CDATA[auxiliary method to add a delegation token]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getAllTokens" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[auxiliary method
|
|
@return all the available tokens]]>
|
|
</doc>
|
|
</method>
|
|
<method name="obtainTokensForNamenodes"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ps" type="org.apache.hadoop.fs.Path[]"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Convenience method to obtain delegation tokens from namenodes
|
|
corresponding to the paths passed.
|
|
@param ps array of paths
|
|
@param conf configuration
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDelegationToken" return="org.apache.hadoop.security.token.Token"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="namenode" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[@param namenode
|
|
@return delegation token]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTokenStorage" return="org.apache.hadoop.security.TokenStorage"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return TokenStore object]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTokenStorage"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ts" type="org.apache.hadoop.security.TokenStorage"/>
|
|
<doc>
|
|
<![CDATA[sets TokenStorage
|
|
@param ts]]>
|
|
</doc>
|
|
</method>
|
|
<method name="loadTaskTokenStorage" return="org.apache.hadoop.security.TokenStorage"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="fileName" type="java.lang.String"/>
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[load token storage and stores it
|
|
@param conf
|
|
@return Loaded TokenStorage object
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="loadTokens" return="org.apache.hadoop.security.TokenStorage"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobTokenFile" type="java.lang.String"/>
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[load job token from a file
|
|
@param conf
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobToken"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="t" type="org.apache.hadoop.security.token.Token"/>
|
|
<param name="ts" type="org.apache.hadoop.security.TokenStorage"/>
|
|
<doc>
|
|
<![CDATA[store job token
|
|
@param t]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobToken" return="org.apache.hadoop.security.token.Token"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ts" type="org.apache.hadoop.security.TokenStorage"/>
|
|
<doc>
|
|
<![CDATA[@return job token]]>
|
|
</doc>
|
|
</method>
|
|
<field name="JOB_TOKEN_HDFS_FILE" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[file name used on HDFS for generated job token]]>
|
|
</doc>
|
|
</field>
|
|
<field name="JOB_TOKENS_FILENAME" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[conf setting for job tokens cache file name]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class provides user facing APIs for transferring secrets from
|
|
the job client to the tasks.
|
|
The secrets can be stored just before submission of jobs and read during
|
|
the task execution.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.security.TokenCache -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.server.jobtracker">
|
|
<!-- start class org.apache.hadoop.mapreduce.server.jobtracker.State -->
|
|
<class name="State" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapreduce.server.jobtracker.State[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapreduce.server.jobtracker.State"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Describes the state of JobTracker]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.server.jobtracker.State -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.tools">
|
|
<!-- start class org.apache.hadoop.mapreduce.tools.CLI -->
|
|
<class name="CLI" extends="org.apache.hadoop.conf.Configured"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Tool"/>
|
|
<constructor name="CLI"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="CLI" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="argv" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<method name="getCounter" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="counters" type="org.apache.hadoop.mapreduce.Counters"/>
|
|
<param name="counterGroupName" type="java.lang.String"/>
|
|
<param name="counterName" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getTaskLogURL" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
|
|
<param name="baseUrl" type="java.lang.String"/>
|
|
</method>
|
|
<method name="displayTasks"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="type" type="java.lang.String"/>
|
|
<param name="state" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Display the information about a job's tasks, of a particular type and
|
|
in a particular state
|
|
|
|
@param job the job
|
|
@param type the type of the task (map/reduce/setup/cleanup)
|
|
@param state the state of the task
|
|
(pending/running/completed/failed/killed)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="displayJobList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="jobs" type="org.apache.hadoop.mapreduce.Job[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="argv" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Interprets the map reduce cli options]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.tools.CLI -->
|
|
</package>
|
|
<package name="org.apache.hadoop.fs">
|
|
<!-- start class org.apache.hadoop.fs.HarFileSystem -->
|
|
<class name="HarFileSystem" extends="org.apache.hadoop.fs.FilterFileSystem"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="HarFileSystem"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[public construction of harfilesystem]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="HarFileSystem" type="org.apache.hadoop.fs.FileSystem"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructor to create a HarFileSystem with an
|
|
underlying filesystem.
|
|
@param fs]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="initialize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.net.URI"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Initialize a Har filesystem per har archive. The
|
|
archive home directory is the top level directory
|
|
in the filesystem that contains the HAR archive.
|
|
Be careful with this method, you do not want to go
|
|
on creating new Filesystem instances per call to
|
|
path.getFileSystem().
|
|
the uri of Har is
|
|
har://underlyingfsscheme-host:port/archivepath.
|
|
or
|
|
har:///archivepath. This assumes the underlying filesystem
|
|
to be used in case not specified.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getHarVersion" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[return the top level archive.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUri" return="java.net.URI"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the uri of this filesystem.
|
|
The uri is of the form
|
|
har://underlyingfsschema-host:port/pathintheunderlyingfs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="makeQualified" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
</method>
|
|
<method name="getFileBlockLocations" return="org.apache.hadoop.fs.BlockLocation[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="file" type="org.apache.hadoop.fs.FileStatus"/>
|
|
<param name="start" type="long"/>
|
|
<param name="len" type="long"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[get block locations from the underlying fs
|
|
@param file the input filestatus to get block locations
|
|
@param start the start in the file
|
|
@param len the length in the file
|
|
@return block locations for this segment of file
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getHarHash" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="p" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[the hash of the path p inside iniside
|
|
the filesystem
|
|
@param p the path in the harfilesystem
|
|
@return the hash code of the path.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getFileStatus" return="org.apache.hadoop.fs.FileStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="f" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[return the filestatus of files in har archive.
|
|
The permission returned are that of the archive
|
|
index files. The permissions are not persisted
|
|
while creating a hadoop archive.
|
|
@param f the path in har filesystem
|
|
@return filestatus.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getFileChecksum" return="org.apache.hadoop.fs.FileChecksum"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="f" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[@return null since no checksum algorithm is implemented.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="open" return="org.apache.hadoop.fs.FSDataInputStream"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="f" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="bufferSize" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Returns a har input stream which fakes end of
|
|
file. It reads the index files to get the part
|
|
file name and the size and start of the file.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="create" return="org.apache.hadoop.fs.FSDataOutputStream"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="f" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="bufferSize" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="create" return="org.apache.hadoop.fs.FSDataOutputStream"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="f" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="permission" type="org.apache.hadoop.fs.permission.FsPermission"/>
|
|
<param name="flag" type="java.util.EnumSet"/>
|
|
<param name="bufferSize" type="int"/>
|
|
<param name="replication" type="short"/>
|
|
<param name="blockSize" type="long"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="setReplication" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="src" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="replication" type="short"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Not implemented.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="delete" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="f" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="recursive" type="boolean"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Not implemented.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="f" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[liststatus returns the children of a directory
|
|
after looking up the index files.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getHomeDirectory" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[return the top level archive path.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setWorkingDirectory"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="newDir" type="org.apache.hadoop.fs.Path"/>
|
|
</method>
|
|
<method name="mkdirs" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="f" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="permission" type="org.apache.hadoop.fs.permission.FsPermission"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[not implemented.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="copyFromLocalFile"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="delSrc" type="boolean"/>
|
|
<param name="src" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="dst" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[not implemented.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="copyToLocalFile"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="delSrc" type="boolean"/>
|
|
<param name="src" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="dst" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[copies the file in the har filesystem to a local file.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="startLocalOutput" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="fsOutputFile" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="tmpLocalFile" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[not implemented.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="completeLocalOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="fsOutputFile" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="tmpLocalFile" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[not implemented.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOwner"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="p" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="username" type="java.lang.String"/>
|
|
<param name="groupname" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[not implemented.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setPermission"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="p" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="permisssion" type="org.apache.hadoop.fs.permission.FsPermission"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Not implemented.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="VERSION" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This is an implementation of the Hadoop Archive
|
|
Filesystem. This archive Filesystem has index files
|
|
of the form _index* and has contents of the form
|
|
part-*. The index files store the indexes of the
|
|
real files. The index files are of the form _masterindex
|
|
and _index. The master index is a level of indirection
|
|
in to the index file to make the look ups faster. the index
|
|
file is sorted with hash code of the paths that it contains
|
|
and the master index contains pointers to the positions in
|
|
index for ranges of hashcodes.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.fs.HarFileSystem -->
|
|
</package>
|
|
<package name="org.apache.hadoop.tools">
|
|
<!-- start class org.apache.hadoop.tools.DistCh -->
|
|
<class name="DistCh" extends="org.apache.hadoop.tools.DistTool"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
<doc>
|
|
<![CDATA[This is the main driver for recursively changing files properties.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A Map-reduce program to recursively change files properties
|
|
such as owner, group and permission.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.DistCh -->
|
|
<!-- start class org.apache.hadoop.tools.DistCp -->
|
|
<class name="DistCp" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Tool"/>
|
|
<constructor name="DistCp" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="copy"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="srcPath" type="java.lang.String"/>
|
|
<param name="destPath" type="java.lang.String"/>
|
|
<param name="logPath" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="srcAsList" type="boolean"/>
|
|
<param name="ignoreReadFailures" type="boolean"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[This is the main driver for recursively copying directories
|
|
across file systems. It takes at least two cmdline parameters. A source
|
|
URL and a destination URL. It then essentially does an "ls -lR" on the
|
|
source URL, and writes the output in a round-robin manner to all the map
|
|
input files. The mapper actually copies the files allotted to it. The
|
|
reduce is empty.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<method name="getRandomId" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A Map-reduce program to recursively copy directories between
|
|
different file-systems.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.DistCp -->
|
|
<!-- start class org.apache.hadoop.tools.DistCp.DuplicationException -->
|
|
<class name="DistCp.DuplicationException" extends="java.io.IOException"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<field name="ERROR_CODE" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Error code for this exception]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[An exception class for duplicated source files.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.DistCp.DuplicationException -->
|
|
<!-- start class org.apache.hadoop.tools.HadoopArchives -->
|
|
<class name="HadoopArchives" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Tool"/>
|
|
<constructor name="HadoopArchives" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
<doc>
|
|
<![CDATA[the main driver for creating the archives
|
|
it takes at least three command line parameters. The parent path,
|
|
The src and the dest. It does an lsr on the source paths.
|
|
The mapper created archuves and the reducer creates
|
|
the archive index.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[the main functions]]>
|
|
</doc>
|
|
</method>
|
|
<field name="VERSION" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[a archive creation utility.
|
|
This class provides methods that can be used
|
|
to create hadoop archives. For understanding of
|
|
Hadoop archives look at {@link HarFileSystem}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.HadoopArchives -->
|
|
<!-- start class org.apache.hadoop.tools.Logalyzer -->
|
|
<class name="Logalyzer" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Logalyzer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="doArchive"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="logListURI" type="java.lang.String"/>
|
|
<param name="archiveDirectory" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[doArchive: Workhorse function to archive log-files.
|
|
@param logListURI : The uri which will serve list of log-files to archive.
|
|
@param archiveDirectory : The directory to store archived logfiles.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="doAnalyze"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="inputFilesDirectory" type="java.lang.String"/>
|
|
<param name="outputDirectory" type="java.lang.String"/>
|
|
<param name="grepPattern" type="java.lang.String"/>
|
|
<param name="sortColumns" type="java.lang.String"/>
|
|
<param name="columnSeparator" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[doAnalyze:
|
|
@param inputFilesDirectory : Directory containing the files to be analyzed.
|
|
@param outputDirectory : Directory to store analysis (output).
|
|
@param grepPattern : Pattern to *grep* for.
|
|
@param sortColumns : Sort specification for output.
|
|
@param columnSeparator : Column separator.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
</method>
|
|
<field name="SORT_COLUMNS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="COLUMN_SEPARATOR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[Logalyzer: A utility tool for archiving and analyzing hadoop logs.
|
|
<p>
|
|
This tool supports archiving and anaylzing (sort/grep) of log-files.
|
|
It takes as input
|
|
a) Input uri which will serve uris of the logs to be archived.
|
|
b) Output directory (not mandatory).
|
|
b) Directory on dfs to archive the logs.
|
|
c) The sort/grep patterns for analyzing the files and separator for boundaries.
|
|
Usage:
|
|
Logalyzer -archive -archiveDir <directory to archive logs> -analysis <directory> -logs <log-list uri> -grep <pattern> -sort <col1, col2> -separator <separator>
|
|
<p>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.Logalyzer -->
|
|
<!-- start class org.apache.hadoop.tools.Logalyzer.LogComparator -->
|
|
<class name="Logalyzer.LogComparator" extends="org.apache.hadoop.io.Text.Comparator"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.conf.Configurable"/>
|
|
<constructor name="Logalyzer.LogComparator"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="compare" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="b1" type="byte[]"/>
|
|
<param name="s1" type="int"/>
|
|
<param name="l1" type="int"/>
|
|
<param name="b2" type="byte[]"/>
|
|
<param name="s2" type="int"/>
|
|
<param name="l2" type="int"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A WritableComparator optimized for UTF8 keys of the logs.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.Logalyzer.LogComparator -->
|
|
<!-- start class org.apache.hadoop.tools.Logalyzer.LogRegexMapper -->
|
|
<class name="Logalyzer.LogRegexMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<constructor name="Logalyzer.LogRegexMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="K"/>
|
|
<param name="value" type="org.apache.hadoop.io.Text"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link Mapper} that extracts text matching a regular expression.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.Logalyzer.LogRegexMapper -->
|
|
</package>
|
|
<package name="org.apache.hadoop.tools.rumen">
|
|
<!-- start class org.apache.hadoop.tools.rumen.AbstractClusterStory -->
|
|
<class name="AbstractClusterStory" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.tools.rumen.ClusterStory"/>
|
|
<constructor name="AbstractClusterStory"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getMachines" return="java.util.Set"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getRacks" return="java.util.Set"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getRandomMachines" return="org.apache.hadoop.tools.rumen.MachineNode[]"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="expected" type="int"/>
|
|
<param name="random" type="java.util.Random"/>
|
|
</method>
|
|
<method name="buildMachineNodeMap"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMachineByName" return="org.apache.hadoop.tools.rumen.MachineNode"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<method name="distance" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="a" type="org.apache.hadoop.tools.rumen.Node"/>
|
|
<param name="b" type="org.apache.hadoop.tools.rumen.Node"/>
|
|
</method>
|
|
<method name="buildRackNodeMap"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getRackByName" return="org.apache.hadoop.tools.rumen.RackNode"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getMaximumDistance" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="parseTopologyTree"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<field name="machineNodes" type="java.util.Set"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="rackNodes" type="java.util.Set"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="mNodesFlattened" type="org.apache.hadoop.tools.rumen.MachineNode[]"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="mNodeMap" type="java.util.Map"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="rNodeMap" type="java.util.Map"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="maximumDistance" type="int"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[{@link AbstractClusterStory} provides a partial implementation of
|
|
{@link ClusterStory} by parsing the topology tree.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.AbstractClusterStory -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.CDFPiecewiseLinearRandomGenerator -->
|
|
<class name="CDFPiecewiseLinearRandomGenerator" extends="org.apache.hadoop.tools.rumen.CDFRandomGenerator"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="CDFPiecewiseLinearRandomGenerator" type="org.apache.hadoop.tools.rumen.LoggedDiscreteCDF"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@param cdf
|
|
builds a CDFRandomValue engine around this
|
|
{@link LoggedDiscreteCDF}, with a defaultly seeded RNG]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="CDFPiecewiseLinearRandomGenerator" type="org.apache.hadoop.tools.rumen.LoggedDiscreteCDF, long"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@param cdf
|
|
builds a CDFRandomValue engine around this
|
|
{@link LoggedDiscreteCDF}, with an explicitly seeded RNG
|
|
@param seed
|
|
the random number generator seed]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="valueAt" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="probability" type="double"/>
|
|
<doc>
|
|
<![CDATA[TODO This code assumes that the empirical minimum resp. maximum is the
|
|
epistomological minimum resp. maximum. This is probably okay for the
|
|
minimum, because that likely represents a task where everything went well,
|
|
but for the maximum we may want to develop a way of extrapolating past the
|
|
maximum.]]>
|
|
</doc>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.CDFPiecewiseLinearRandomGenerator -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.CDFRandomGenerator -->
|
|
<class name="CDFRandomGenerator" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="initializeTables"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="cdf" type="org.apache.hadoop.tools.rumen.LoggedDiscreteCDF"/>
|
|
</method>
|
|
<method name="floorIndex" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="probe" type="double"/>
|
|
</method>
|
|
<method name="getRankingAt" return="double"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="index" type="int"/>
|
|
</method>
|
|
<method name="getDatumAt" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="index" type="int"/>
|
|
</method>
|
|
<method name="randomValue" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueAt" return="long"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="probability" type="double"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An instance of this class generates random values that confirm to the
|
|
embedded {@link LoggedDiscreteCDF} . The discrete CDF is a pointwise
|
|
approximation of the "real" CDF. We therefore have a choice of interpolation
|
|
rules.
|
|
|
|
A concrete subclass of this abstract class will implement valueAt(double)
|
|
using a class-dependent interpolation rule.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.CDFRandomGenerator -->
|
|
<!-- start interface org.apache.hadoop.tools.rumen.ClusterStory -->
|
|
<interface name="ClusterStory" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="getMachines" return="java.util.Set"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get all machines of the cluster.
|
|
@return A read-only set that contains all machines of the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRacks" return="java.util.Set"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get all racks of the cluster.
|
|
@return A read-only set that contains all racks of the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getClusterTopology" return="org.apache.hadoop.tools.rumen.Node"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the cluster topology tree.
|
|
@return The root node of the cluster topology tree.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRandomMachines" return="org.apache.hadoop.tools.rumen.MachineNode[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="expected" type="int"/>
|
|
<param name="random" type="java.util.Random"/>
|
|
<doc>
|
|
<![CDATA[Select a random set of machines.
|
|
@param expected The expected sample size.
|
|
@param random Random number generator to use.
|
|
@return An array of up to expected number of {@link MachineNode}s.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMachineByName" return="org.apache.hadoop.tools.rumen.MachineNode"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Get {@link MachineNode} by its host name.
|
|
|
|
@return The {@line MachineNode} with the same name. Or null if not found.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRackByName" return="org.apache.hadoop.tools.rumen.RackNode"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Get {@link RackNode} by its name.
|
|
@return The {@line RackNode} with the same name. Or null if not found.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="distance" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="a" type="org.apache.hadoop.tools.rumen.Node"/>
|
|
<param name="b" type="org.apache.hadoop.tools.rumen.Node"/>
|
|
<doc>
|
|
<![CDATA[Determine the distance between two {@link Node}s. Currently, the distance
|
|
is loosely defined as the length of the longer path for either a or b to
|
|
reach their common ancestor.
|
|
|
|
@param a
|
|
@param b
|
|
@return The distance between {@link Node} a and {@link Node} b.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaximumDistance" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the maximum distance possible between any two nodes.
|
|
@return the maximum distance possible between any two nodes.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link ClusterStory} represents all configurations of a MapReduce cluster,
|
|
including nodes, network topology, and slot configurations.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.tools.rumen.ClusterStory -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.ClusterTopologyReader -->
|
|
<class name="ClusterTopologyReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ClusterTopologyReader" type="org.apache.hadoop.fs.Path, org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Constructor.
|
|
|
|
@param path
|
|
Path to the JSON-encoded topology file, possibly compressed.
|
|
@param conf
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="ClusterTopologyReader" type="java.io.InputStream"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Constructor.
|
|
|
|
@param input
|
|
The input stream for the JSON-encoded topology data.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="get" return="org.apache.hadoop.tools.rumen.LoggedNetworkTopology"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link LoggedNetworkTopology} object.
|
|
|
|
@return The {@link LoggedNetworkTopology} object parsed from the input.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Reading JSON-encoded cluster topology and produce the parsed
|
|
{@link LoggedNetworkTopology} object.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.ClusterTopologyReader -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.CurrentJHParser -->
|
|
<class name="CurrentJHParser" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.tools.rumen.JobHistoryParser"/>
|
|
<constructor name="CurrentJHParser" type="java.io.InputStream"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="canParse" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="input" type="java.io.InputStream"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Can this parser parse the input?
|
|
|
|
@param input
|
|
@return Whether this parser can parse the input.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="nextEvent" return="org.apache.hadoop.mapreduce.jobhistory.HistoryEvent"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link JobHistoryParser} that parses {@link JobHistory} files produced by
|
|
{@link org.apache.hadoop.mapreduce.jobhistory.JobHistory} in the same source
|
|
code tree as rumen.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.CurrentJHParser -->
|
|
<!-- start interface org.apache.hadoop.tools.rumen.DeepCompare -->
|
|
<interface name="DeepCompare" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="deepCompare"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="org.apache.hadoop.tools.rumen.DeepCompare"/>
|
|
<param name="myLocation" type="org.apache.hadoop.tools.rumen.TreePath"/>
|
|
<exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/>
|
|
<doc>
|
|
<![CDATA[@param other
|
|
the other comparand that's being compared to me
|
|
@param myLocation
|
|
the path that got to me. In the root, myLocation is null. To
|
|
process the scalar {@code foo} field of the root we will make a
|
|
recursive call with a {@link TreePath} whose {@code fieldName} is
|
|
{@code "bar"} and whose {@code index} is -1 and whose {@code
|
|
parent} is {@code null}. To process the plural {@code bar} field
|
|
of the root we will make a recursive call with a {@link TreePath}
|
|
whose fieldName is {@code "foo"} and whose {@code index} is -1 and
|
|
whose {@code parent} is also {@code null}.
|
|
@throws DeepInequalityException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Classes that implement this interface can deep-compare [for equality only,
|
|
not order] with another instance. They do a deep compare. If there is any
|
|
semantically significant difference, an implementer throws an Exception to be
|
|
thrown with a chain of causes describing the chain of field references and
|
|
indices that get you to the miscompared point.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.tools.rumen.DeepCompare -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.DeepInequalityException -->
|
|
<class name="DeepInequalityException" extends="java.lang.Exception"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="DeepInequalityException" type="java.lang.String, org.apache.hadoop.tools.rumen.TreePath, java.lang.Throwable"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@param message
|
|
an exception message
|
|
@param path
|
|
the path that gets from the root to the inequality
|
|
|
|
This is the constructor that I intend to have used for this
|
|
exception.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="DeepInequalityException" type="java.lang.String, org.apache.hadoop.tools.rumen.TreePath"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@param message
|
|
an exception message
|
|
@param path
|
|
the path that gets from the root to the inequality
|
|
|
|
This is the constructor that I intend to have used for this
|
|
exception.]]>
|
|
</doc>
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[We use this exception class in the unit test, and we do a deep comparison
|
|
when we run the]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.DeepInequalityException -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.DefaultInputDemuxer -->
|
|
<class name="DefaultInputDemuxer" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.tools.rumen.InputDemuxer"/>
|
|
<constructor name="DefaultInputDemuxer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="bindTo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getNext" return="org.apache.hadoop.tools.rumen.Pair"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link DefaultInputDemuxer} acts as a pass-through demuxer. It just opens
|
|
each file and returns back the input stream. If the input is compressed, it
|
|
would return a decompression stream.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.DefaultInputDemuxer -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.DefaultOutputter -->
|
|
<class name="DefaultOutputter" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.tools.rumen.Outputter"/>
|
|
<constructor name="DefaultOutputter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="init"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="output"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="object" type="T"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The default {@link Outputter} that outputs to a plain file. Compression
|
|
will be applied if the path has the right suffix.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.DefaultOutputter -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.DeskewedJobTraceReader -->
|
|
<class name="DeskewedJobTraceReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="java.io.Closeable"/>
|
|
<constructor name="DeskewedJobTraceReader" type="org.apache.hadoop.tools.rumen.JobTraceReader, int, boolean"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Constructor.
|
|
|
|
@param reader
|
|
the {@link JobTraceReader} that's being protected
|
|
@param skewBufferSize
|
|
[the number of late jobs that can preced a later out-of-order
|
|
earlier job
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="DeskewedJobTraceReader" type="org.apache.hadoop.tools.rumen.JobTraceReader"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.DeskewedJobTraceReader -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.Folder -->
|
|
<class name="Folder" extends="org.apache.hadoop.conf.Configured"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Tool"/>
|
|
<constructor name="Folder"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[@param args]]>
|
|
</doc>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.Folder -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.Hadoop20JHParser -->
|
|
<class name="Hadoop20JHParser" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.tools.rumen.JobHistoryParser"/>
|
|
<constructor name="Hadoop20JHParser" type="java.io.InputStream"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="canParse" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="input" type="java.io.InputStream"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Can this parser parse the input?
|
|
|
|
@param input
|
|
@return Whether this parser can parse the input.
|
|
@throws IOException
|
|
|
|
We will deem a stream to be a good 0.20 job history stream if the
|
|
first line is exactly "Meta VERSION=\"1\" ."]]>
|
|
</doc>
|
|
</method>
|
|
<method name="nextEvent" return="org.apache.hadoop.mapreduce.jobhistory.HistoryEvent"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link JobHistoryParser} to parse job histories for hadoop 0.20 (META=1).]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.Hadoop20JHParser -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.HadoopLogsAnalyzer -->
|
|
<class name="HadoopLogsAnalyzer" extends="org.apache.hadoop.conf.Configured"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Tool"/>
|
|
<constructor name="HadoopLogsAnalyzer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[@param args
|
|
|
|
Last arg is the input file. That file can be a directory, in which
|
|
case you get all the files in sorted order. We will decompress
|
|
files whose nmes end in .gz .
|
|
|
|
switches: -c collect line types.
|
|
|
|
-d debug mode
|
|
|
|
-delays print out the delays [interval between job submit time and
|
|
launch time]
|
|
|
|
-runtimes print out the job runtimes
|
|
|
|
-spreads print out the ratio of 10%ile and 90%ile, of both the
|
|
successful map task attempt run times and the the successful
|
|
reduce task attempt run times
|
|
|
|
-tasktimes prints out individual task time distributions
|
|
|
|
collects all the line types and prints the first example of each
|
|
one]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This is the main class for rumen log mining functionality.
|
|
|
|
It reads a directory of job tracker logs, and computes various information
|
|
about it. See {@code usage()}, below.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.HadoopLogsAnalyzer -->
|
|
<!-- start interface org.apache.hadoop.tools.rumen.InputDemuxer -->
|
|
<interface name="InputDemuxer" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="java.io.Closeable"/>
|
|
<method name="bindTo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Bind the {@link InputDemuxer} to a particular file.
|
|
|
|
@param path
|
|
The path to the find it should bind to.
|
|
@param conf
|
|
Configuration
|
|
@throws IOException
|
|
|
|
Returns true when the binding succeeds. If the file can be read
|
|
but is in the wrong format, returns false. IOException is
|
|
reserved for read errors.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNext" return="org.apache.hadoop.tools.rumen.Pair"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the next <name, input> pair. The name should preserve the original job
|
|
history file or job conf file name. The input object should be closed
|
|
before calling getNext() again. The old input object would be invalid after
|
|
calling getNext() again.
|
|
|
|
@return the next <name, input> pair.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link InputDemuxer} dem-ultiplexes the input files into individual input
|
|
streams.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.tools.rumen.InputDemuxer -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.Job20LineHistoryEventEmitter -->
|
|
<class name="Job20LineHistoryEventEmitter" extends="org.apache.hadoop.tools.rumen.HistoryEventEmitter"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.Job20LineHistoryEventEmitter -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.JobBuilder -->
|
|
<class name="JobBuilder" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobBuilder" type="java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getJobID" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="process"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="event" type="org.apache.hadoop.mapreduce.jobhistory.HistoryEvent"/>
|
|
<doc>
|
|
<![CDATA[Process one {@link HistoryEvent}
|
|
|
|
@param event
|
|
The {@link HistoryEvent} to be processed.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="process"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="java.util.Properties"/>
|
|
<doc>
|
|
<![CDATA[Process a collection of JobConf {@link Properties}. We do not restrict it
|
|
to be called once. It is okay to process a conf before, during or after the
|
|
events.
|
|
|
|
@param conf
|
|
The job conf properties to be added.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="build" return="org.apache.hadoop.tools.rumen.LoggedJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Request the builder to build the final object. Once called, the
|
|
{@link JobBuilder} would accept no more events or job-conf properties.
|
|
|
|
@return Parsed {@link LoggedJob} object.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link JobBuilder} builds one job. It processes a sequence of
|
|
{@link HistoryEvent}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.JobBuilder -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.JobConfigurationParser -->
|
|
<class name="JobConfigurationParser" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobConfigurationParser" type="java.util.List"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructor
|
|
|
|
@param interested
|
|
properties we should extract from the job configuration xml.]]>
|
|
</doc>
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[{@link JobConfigurationParser} parses the job configuration xml file, and
|
|
extracts various framework specific properties. It parses the file using a
|
|
stream-parser and thus is more memory efficient. [This optimization may be
|
|
postponed for a future release]]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.JobConfigurationParser -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.JobConfPropertyNames -->
|
|
<class name="JobConfPropertyNames" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.tools.rumen.JobConfPropertyNames[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.tools.rumen.JobConfPropertyNames"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getCandidates" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.JobConfPropertyNames -->
|
|
<!-- start interface org.apache.hadoop.tools.rumen.JobHistoryParser -->
|
|
<interface name="JobHistoryParser" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="java.io.Closeable"/>
|
|
<method name="nextEvent" return="org.apache.hadoop.mapreduce.jobhistory.HistoryEvent"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the next {@link HistoryEvent}
|
|
@return the next {@link HistoryEvent}. If no more events left, return null.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link JobHistoryParser} defines the interface of a Job History file parser.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.tools.rumen.JobHistoryParser -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.JobHistoryParserFactory -->
|
|
<class name="JobHistoryParserFactory" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobHistoryParserFactory"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getParser" return="org.apache.hadoop.tools.rumen.JobHistoryParser"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ris" type="org.apache.hadoop.tools.rumen.RewindableInputStream"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link JobHistoryParserFactory} is a singleton class that attempts to
|
|
determine the version of job history and return a proper parser.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.JobHistoryParserFactory -->
|
|
<!-- start interface org.apache.hadoop.tools.rumen.JobStory -->
|
|
<interface name="JobStory" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="getJobConf" return="org.apache.hadoop.mapred.JobConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link JobConf} for the job.
|
|
@return the <code>JobConf</code> for the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the job name.
|
|
@return the job name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the job ID
|
|
@return the job ID]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUser" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user who ran the job.
|
|
@return the user who ran the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSubmissionTime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the job submission time.
|
|
@return the job submission time]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumberMaps" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of maps in the {@link JobStory}.
|
|
@return the number of maps in the <code>Job</code>]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumberReduces" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of reduce in the {@link JobStory}.
|
|
@return the number of reduces in the <code>Job</code>]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputSplits" return="org.apache.hadoop.mapreduce.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the input splits for the job.
|
|
@return the input splits for the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskInfo" return="org.apache.hadoop.tools.rumen.TaskInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskType" type="org.apache.hadoop.mapreduce.TaskType"/>
|
|
<param name="taskNumber" type="int"/>
|
|
<doc>
|
|
<![CDATA[Get {@link TaskInfo} for a given task.
|
|
@param taskType {@link TaskType} of the task
|
|
@param taskNumber Partition number of the task
|
|
@return the <code>TaskInfo</code> for the given task]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskAttemptInfo" return="org.apache.hadoop.tools.rumen.TaskAttemptInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskType" type="org.apache.hadoop.mapreduce.TaskType"/>
|
|
<param name="taskNumber" type="int"/>
|
|
<param name="taskAttemptNumber" type="int"/>
|
|
<doc>
|
|
<![CDATA[Get {@link TaskAttemptInfo} for a given task-attempt, without regard to
|
|
impact of locality (e.g. not needed to make scheduling decisions).
|
|
@param taskType {@link TaskType} of the task-attempt
|
|
@param taskNumber Partition number of the task-attempt
|
|
@param taskAttemptNumber Attempt number of the task
|
|
@return the <code>TaskAttemptInfo</code> for the given task-attempt]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapTaskAttemptInfoAdjusted" return="org.apache.hadoop.tools.rumen.TaskAttemptInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskNumber" type="int"/>
|
|
<param name="taskAttemptNumber" type="int"/>
|
|
<param name="locality" type="int"/>
|
|
<doc>
|
|
<![CDATA[Get {@link TaskAttemptInfo} for a given task-attempt, considering impact
|
|
of locality.
|
|
@param taskNumber Partition number of the task-attempt
|
|
@param taskAttemptNumber Attempt number of the task
|
|
@param locality Data locality of the task as scheduled in simulation
|
|
@return the <code>TaskAttemptInfo</code> for the given task-attempt]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutcome" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the outcome of the job execution.
|
|
@return The outcome of the job execution.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getQueueName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the queue where the job is submitted.
|
|
@return the queue where the job is submitted.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link JobStory} represents the runtime information available for a
|
|
completed Map-Reduce job.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.tools.rumen.JobStory -->
|
|
<!-- start interface org.apache.hadoop.tools.rumen.JobStoryProducer -->
|
|
<interface name="JobStoryProducer" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="java.io.Closeable"/>
|
|
<method name="getNextJob" return="org.apache.hadoop.tools.rumen.JobStory"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the next job.
|
|
@return The next job. Or null if no more job is available.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link JobStoryProducer} produces the sequence of {@link JobStory}'s.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.tools.rumen.JobStoryProducer -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.JobTraceReader -->
|
|
<class name="JobTraceReader" extends="org.apache.hadoop.tools.rumen.JsonObjectMapperParser"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobTraceReader" type="org.apache.hadoop.fs.Path, org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Constructor.
|
|
|
|
@param path
|
|
Path to the JSON trace file, possibly compressed.
|
|
@param conf
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobTraceReader" type="java.io.InputStream"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Constructor.
|
|
|
|
@param input
|
|
The input stream for the JSON trace.]]>
|
|
</doc>
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[Reading JSON-encoded job traces and produce {@link LoggedJob} instances.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.JobTraceReader -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.JsonObjectMapperWriter -->
|
|
<class name="JsonObjectMapperWriter" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="java.io.Closeable"/>
|
|
<constructor name="JsonObjectMapperWriter" type="java.io.OutputStream, boolean"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="object" type="T"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Simple wrapper around {@link JsonGenerator} to write objects in JSON format.
|
|
@param <T> The type of the objects to be written.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.JsonObjectMapperWriter -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.LoggedDiscreteCDF -->
|
|
<class name="LoggedDiscreteCDF" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.tools.rumen.DeepCompare"/>
|
|
<constructor name="LoggedDiscreteCDF"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getMinimum" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getRankings" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMaximum" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getNumberValues" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="deepCompare"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="comparand" type="org.apache.hadoop.tools.rumen.DeepCompare"/>
|
|
<param name="loc" type="org.apache.hadoop.tools.rumen.TreePath"/>
|
|
<exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link LoggedDiscreteCDF} is a discrete approximation of a cumulative
|
|
distribution function, with this class set up to meet the requirements of the
|
|
Jackson JSON parser/generator.
|
|
|
|
All of the public methods are simply accessors for the instance variables we
|
|
want to write out in the JSON files.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.LoggedDiscreteCDF -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.LoggedJob -->
|
|
<class name="LoggedJob" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.tools.rumen.DeepCompare"/>
|
|
<method name="setUnknownAttribute"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="attributeName" type="java.lang.String"/>
|
|
<param name="ignored" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="getUser" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobID" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getPriority" return="org.apache.hadoop.tools.rumen.LoggedJob.JobPriority"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getComputonsPerMapInputByte" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getComputonsPerMapOutputByte" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getComputonsPerReduceInputByte" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getComputonsPerReduceOutputByte" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getSubmitTime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getLaunchTime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getFinishTime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getHeapMegabytes" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getTotalMaps" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getTotalReduces" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getOutcome" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobtype" return="org.apache.hadoop.tools.rumen.LoggedJob.JobType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getDirectDependantJobs" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMapTasks" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getReduceTasks" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getOtherTasks" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getSuccessfulMapAttemptCDFs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getFailedMapAttemptCDFs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getSuccessfulReduceAttemptCDF" return="org.apache.hadoop.tools.rumen.LoggedDiscreteCDF"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getFailedReduceAttemptCDF" return="org.apache.hadoop.tools.rumen.LoggedDiscreteCDF"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMapperTriesToSucceed" return="double[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getFailedMapperFraction" return="double"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getRelativeTime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getQueue" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getClusterMapMB" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getClusterReduceMB" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobMapMB" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobReduceMB" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="deepCompare"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="comparand" type="org.apache.hadoop.tools.rumen.DeepCompare"/>
|
|
<param name="loc" type="org.apache.hadoop.tools.rumen.TreePath"/>
|
|
<exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link LoggedDiscreteCDF} is a representation of an hadoop job, with the
|
|
details of this class set up to meet the requirements of the Jackson JSON
|
|
parser/generator.
|
|
|
|
All of the public methods are simply accessors for the instance variables we
|
|
want to write out in the JSON files.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.LoggedJob -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.LoggedJob.JobPriority -->
|
|
<class name="LoggedJob.JobPriority" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.tools.rumen.LoggedJob.JobPriority[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.tools.rumen.LoggedJob.JobPriority"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.LoggedJob.JobPriority -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.LoggedJob.JobType -->
|
|
<class name="LoggedJob.JobType" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.tools.rumen.LoggedJob.JobType[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.tools.rumen.LoggedJob.JobType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.LoggedJob.JobType -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.LoggedLocation -->
|
|
<class name="LoggedLocation" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.tools.rumen.DeepCompare"/>
|
|
<constructor name="LoggedLocation"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getLayers" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setUnknownAttribute"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="attributeName" type="java.lang.String"/>
|
|
<param name="ignored" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="deepCompare"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="comparand" type="org.apache.hadoop.tools.rumen.DeepCompare"/>
|
|
<param name="loc" type="org.apache.hadoop.tools.rumen.TreePath"/>
|
|
<exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link LoggedLocation} is a representation of a point in an hierarchical
|
|
network, represented as a series of membership names, broadest first.
|
|
|
|
For example, if your network has <i>hosts</i> grouped into <i>racks</i>, then
|
|
in onecluster you might have a node {@code node1} on rack {@code rack1}. This
|
|
would be represented with a ArrayList of two layers, with two {@link String}
|
|
s being {@code "rack1"} and {@code "node1"}.
|
|
|
|
The details of this class are set up to meet the requirements of the Jackson
|
|
JSON parser/generator.
|
|
|
|
All of the public methods are simply accessors for the instance variables we
|
|
want to write out in the JSON files.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.LoggedLocation -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.LoggedNetworkTopology -->
|
|
<class name="LoggedNetworkTopology" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.tools.rumen.DeepCompare"/>
|
|
<constructor name="LoggedNetworkTopology"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setUnknownAttribute"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="attributeName" type="java.lang.String"/>
|
|
<param name="ignored" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="getName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getChildren" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="deepCompare"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="comparand" type="org.apache.hadoop.tools.rumen.DeepCompare"/>
|
|
<param name="loc" type="org.apache.hadoop.tools.rumen.TreePath"/>
|
|
<exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link LoggedNetworkTopology} represents a tree that in turn represents a
|
|
hierarchy of hosts. The current version requires the tree to have all leaves
|
|
at the same level.
|
|
|
|
All of the public methods are simply accessors for the instance variables we
|
|
want to write out in the JSON files.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.LoggedNetworkTopology -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.LoggedSingleRelativeRanking -->
|
|
<class name="LoggedSingleRelativeRanking" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.tools.rumen.DeepCompare"/>
|
|
<constructor name="LoggedSingleRelativeRanking"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setUnknownAttribute"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="attributeName" type="java.lang.String"/>
|
|
<param name="ignored" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="getRelativeRanking" return="double"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getDatum" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="deepCompare"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="comparand" type="org.apache.hadoop.tools.rumen.DeepCompare"/>
|
|
<param name="loc" type="org.apache.hadoop.tools.rumen.TreePath"/>
|
|
<exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link LoggedSingleRelativeRanking} represents an X-Y coordinate of a
|
|
single point in a discrete CDF.
|
|
|
|
All of the public methods are simply accessors for the instance variables we
|
|
want to write out in the JSON files.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.LoggedSingleRelativeRanking -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.LoggedTask -->
|
|
<class name="LoggedTask" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.tools.rumen.DeepCompare"/>
|
|
<method name="setUnknownAttribute"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="attributeName" type="java.lang.String"/>
|
|
<param name="ignored" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="getInputBytes" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getInputRecords" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getOutputBytes" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getOutputRecords" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getTaskID" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getStartTime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getFinishTime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getAttempts" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getPreferredLocations" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getTaskStatus" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getTaskType" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="incorporateCounters"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="counters" type="org.apache.hadoop.mapreduce.jobhistory.JhCounters"/>
|
|
</method>
|
|
<method name="deepCompare"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="comparand" type="org.apache.hadoop.tools.rumen.DeepCompare"/>
|
|
<param name="loc" type="org.apache.hadoop.tools.rumen.TreePath"/>
|
|
<exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link LoggedTask} represents a [hadoop] task that is part of a hadoop job.
|
|
It knows about the [pssibly empty] sequence of attempts, its I/O footprint,
|
|
and its runtime.
|
|
|
|
All of the public methods are simply accessors for the instance variables we
|
|
want to write out in the JSON files.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.LoggedTask -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.LoggedTaskAttempt -->
|
|
<class name="LoggedTaskAttempt" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.tools.rumen.DeepCompare"/>
|
|
<method name="setUnknownAttribute"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="attributeName" type="java.lang.String"/>
|
|
<param name="ignored" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="getShuffleFinished" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getSortFinished" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getAttemptID" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getResult" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getStartTime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getFinishTime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getHostName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getHdfsBytesRead" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getHdfsBytesWritten" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getFileBytesRead" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getFileBytesWritten" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMapInputRecords" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMapOutputBytes" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMapOutputRecords" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getCombineInputRecords" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getReduceInputGroups" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getReduceInputRecords" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getReduceShuffleBytes" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getReduceOutputRecords" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getSpilledRecords" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getLocation" return="org.apache.hadoop.tools.rumen.LoggedLocation"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMapInputBytes" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="incorporateCounters"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="counters" type="org.apache.hadoop.mapreduce.jobhistory.JhCounters"/>
|
|
</method>
|
|
<method name="deepCompare"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="comparand" type="org.apache.hadoop.tools.rumen.DeepCompare"/>
|
|
<param name="loc" type="org.apache.hadoop.tools.rumen.TreePath"/>
|
|
<exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link LoggedTaskAttempt} represents an attempt to run an hadoop task in a
|
|
hadoop job. Note that a task can have several attempts.
|
|
|
|
All of the public methods are simply accessors for the instance variables we
|
|
want to write out in the JSON files.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.LoggedTaskAttempt -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.MachineNode -->
|
|
<class name="MachineNode" extends="org.apache.hadoop.tools.rumen.Node"
|
|
abstract="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="obj" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMemory" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the available physical RAM of the node.
|
|
@return The available physical RAM of the node, in KB.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapSlots" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of map slots of the node.
|
|
@return The number of map slots of the node.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReduceSlots" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of reduce slots of the node.
|
|
@return The number of reduce slots fo the node.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMemoryPerMapSlot" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the amount of RAM reserved for each map slot.
|
|
@return the amount of RAM reserved for each map slot, in KB.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMemoryPerReduceSlot" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the amount of RAM reserved for each reduce slot.
|
|
@return the amount of RAM reserved for each reduce slot, in KB.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumCores" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of cores of the node.
|
|
@return the number of cores of the node.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRackNode" return="org.apache.hadoop.tools.rumen.RackNode"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the rack node that the machine belongs to.
|
|
|
|
@return The rack node that the machine belongs to. Returns null if the
|
|
machine does not belong to any rack.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addChild" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="child" type="org.apache.hadoop.tools.rumen.Node"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link MachineNode} represents the configuration of a cluster node.
|
|
{@link MachineNode} should be constructed by {@link MachineNode.Builder}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.MachineNode -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.MachineNode.Builder -->
|
|
<class name="MachineNode.Builder" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MachineNode.Builder" type="java.lang.String, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Start building a new NodeInfo object.
|
|
@param name
|
|
Unique name of the node. Typically the fully qualified domain
|
|
name.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="setMemory" return="org.apache.hadoop.tools.rumen.MachineNode.Builder"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="memory" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the physical memory of the node.
|
|
@param memory Available RAM in KB.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapSlots" return="org.apache.hadoop.tools.rumen.MachineNode.Builder"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="mapSlots" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the number of map slot for the node.
|
|
@param mapSlots The number of map slots for the node.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setReduceSlots" return="org.apache.hadoop.tools.rumen.MachineNode.Builder"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="reduceSlots" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the number of reduce slot for the node.
|
|
@param reduceSlots The number of reduce slots for the node.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMemoryPerMapSlot" return="org.apache.hadoop.tools.rumen.MachineNode.Builder"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="memoryPerMapSlot" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the amount of RAM reserved for each map slot.
|
|
@param memoryPerMapSlot The amount of RAM reserved for each map slot, in KB.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMemoryPerReduceSlot" return="org.apache.hadoop.tools.rumen.MachineNode.Builder"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="memoryPerReduceSlot" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the amount of RAM reserved for each reduce slot.
|
|
@param memoryPerReduceSlot The amount of RAM reserved for each reduce slot, in KB.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setNumCores" return="org.apache.hadoop.tools.rumen.MachineNode.Builder"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="numCores" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the number of cores for the node.
|
|
@param numCores Number of cores for the node.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cloneFrom" return="org.apache.hadoop.tools.rumen.MachineNode.Builder"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ref" type="org.apache.hadoop.tools.rumen.MachineNode"/>
|
|
<doc>
|
|
<![CDATA[Clone the settings from a reference {@link MachineNode} object.
|
|
@param ref The reference {@link MachineNode} object.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="build" return="org.apache.hadoop.tools.rumen.MachineNode"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Build the {@link MachineNode} object.
|
|
@return The {@link MachineNode} object being built.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Builder for a NodeInfo object]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.MachineNode.Builder -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.MapAttempt20LineHistoryEventEmitter -->
|
|
<class name="MapAttempt20LineHistoryEventEmitter" extends="org.apache.hadoop.tools.rumen.TaskAttempt20LineEventEmitter"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MapAttempt20LineHistoryEventEmitter"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.MapAttempt20LineHistoryEventEmitter -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.MapTaskAttemptInfo -->
|
|
<class name="MapTaskAttemptInfo" extends="org.apache.hadoop.tools.rumen.TaskAttemptInfo"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MapTaskAttemptInfo" type="org.apache.hadoop.mapred.TaskStatus.State, org.apache.hadoop.tools.rumen.TaskInfo, long"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRuntime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getMapRuntime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the runtime for the <b>map</b> phase of the map-task attempt.
|
|
|
|
@return the runtime for the <b>map</b> phase of the map-task attempt]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link MapTaskAttemptInfo} represents the information with regard to a
|
|
map task attempt.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.MapTaskAttemptInfo -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.Node -->
|
|
<class name="Node" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="java.lang.Comparable"/>
|
|
<constructor name="Node" type="java.lang.String, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@param name
|
|
A unique name to identify a node in the cluster.
|
|
@param level
|
|
The level of the node in the cluster]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the name of the node.
|
|
|
|
@return The name of the node.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLevel" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the level of the node.
|
|
@return The level of the node.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addChild" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="child" type="org.apache.hadoop.tools.rumen.Node"/>
|
|
<doc>
|
|
<![CDATA[Add a child node to this node.
|
|
@param child The child node to be added. The child node should currently not be belong to another cluster topology.
|
|
@return Boolean indicating whether the node is successfully added.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hasChildren" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Does this node have any children?
|
|
@return Boolean indicate whether this node has any children.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getChildren" return="java.util.Set"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the children of this node.
|
|
|
|
@return The children of this node. If no child, an empty set will be
|
|
returned. The returned set is read-only.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getParent" return="org.apache.hadoop.tools.rumen.Node"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the parent node.
|
|
@return the parent node. If root node, return null.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="obj" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="compareTo" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="org.apache.hadoop.tools.rumen.Node"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link Node} represents a node in the cluster topology. A node can be a
|
|
{@MachineNode}, or a {@link RackNode}, etc.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.Node -->
|
|
<!-- start interface org.apache.hadoop.tools.rumen.Outputter -->
|
|
<interface name="Outputter" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="java.io.Closeable"/>
|
|
<method name="init"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Initialize the {@link Outputter} to a specific path.
|
|
@param path The {@link Path} to the output file.
|
|
@param conf Configuration
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="output"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="object" type="T"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Output an object.
|
|
@param object The objecte.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Interface to output a sequence of objects of type T.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.tools.rumen.Outputter -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants -->
|
|
<class name="Pre21JobHistoryConstants" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Pre21JobHistoryConstants"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values -->
|
|
<class name="Pre21JobHistoryConstants.Values" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This enum contains some of the values commonly used by history log events.
|
|
since values in history can only be strings - Values.name() is used in
|
|
most places in history file.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.RackNode -->
|
|
<class name="RackNode" extends="org.apache.hadoop.tools.rumen.Node"
|
|
abstract="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="RackNode" type="java.lang.String, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="addChild" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="child" type="org.apache.hadoop.tools.rumen.Node"/>
|
|
</method>
|
|
<method name="getMachinesInRack" return="java.util.Set"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the machine nodes that belong to the rack.
|
|
@return The machine nodes that belong to the rack.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link RackNode} represents a rack node in the cluster topology.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.RackNode -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.RandomSeedGenerator -->
|
|
<class name="RandomSeedGenerator" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="RandomSeedGenerator"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getSeed" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="streamId" type="java.lang.String"/>
|
|
<param name="masterSeed" type="long"/>
|
|
<doc>
|
|
<![CDATA[Generates a new random seed.
|
|
|
|
@param streamId a string identifying the stream of random numbers
|
|
@param masterSeed higher level master random seed
|
|
@return the random seed. Different (streamId, masterSeed) pairs result in
|
|
(vastly) different random seeds.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The purpose of this class is to generate new random seeds from a master
|
|
seed. This is needed to make the Random().next*() calls in rumen and mumak
|
|
deterministic so that mumak simulations become deterministically replayable.
|
|
|
|
In these tools we need many independent streams of random numbers, some of
|
|
which are created dynamically. We seed these streams with the sub-seeds
|
|
returned by RandomSeedGenerator.
|
|
|
|
For a slightly more complicated approach to generating multiple streams of
|
|
random numbers with better theoretical guarantees, see
|
|
P. L'Ecuyer, R. Simard, E. J. Chen, and W. D. Kelton,
|
|
``An Objected-Oriented Random-Number Package with Many Long Streams and
|
|
Substreams'', Operations Research, 50, 6 (2002), 1073--1075
|
|
http://www.iro.umontreal.ca/~lecuyer/papers.html
|
|
http://www.iro.umontreal.ca/~lecuyer/myftp/streams00/]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.RandomSeedGenerator -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.ReduceAttempt20LineHistoryEventEmitter -->
|
|
<class name="ReduceAttempt20LineHistoryEventEmitter" extends="org.apache.hadoop.tools.rumen.TaskAttempt20LineEventEmitter"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.ReduceAttempt20LineHistoryEventEmitter -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.ReduceTaskAttemptInfo -->
|
|
<class name="ReduceTaskAttemptInfo" extends="org.apache.hadoop.tools.rumen.TaskAttemptInfo"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ReduceTaskAttemptInfo" type="org.apache.hadoop.mapred.TaskStatus.State, org.apache.hadoop.tools.rumen.TaskInfo, long, long, long"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getReduceRuntime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the runtime for the <b>reduce</b> phase of the reduce task-attempt.
|
|
|
|
@return the runtime for the <b>reduce</b> phase of the reduce task-attempt]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getShuffleRuntime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the runtime for the <b>shuffle</b> phase of the reduce task-attempt.
|
|
|
|
@return the runtime for the <b>shuffle</b> phase of the reduce task-attempt]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMergeRuntime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the runtime for the <b>merge</b> phase of the reduce task-attempt
|
|
|
|
@return the runtime for the <b>merge</b> phase of the reduce task-attempt]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRuntime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link ReduceTaskAttemptInfo} represents the information with regard to a
|
|
reduce task attempt.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.ReduceTaskAttemptInfo -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.RewindableInputStream -->
|
|
<class name="RewindableInputStream" extends="java.io.InputStream"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="RewindableInputStream" type="java.io.InputStream"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructor.
|
|
|
|
@param input]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="RewindableInputStream" type="java.io.InputStream, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructor
|
|
|
|
@param input
|
|
input stream.
|
|
@param maxBytesToRemember
|
|
Maximum number of bytes we need to remember at the beginning of
|
|
the stream. If {@link #rewind()} is called after so many bytes are
|
|
read from the stream, {@link #rewind()} would fail.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="read" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="read" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="buffer" type="byte[]"/>
|
|
<param name="offset" type="int"/>
|
|
<param name="length" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="rewind" return="java.io.InputStream"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A simple wrapper class to make any input stream "rewindable". It could be
|
|
made more memory efficient by grow the internal buffer adaptively.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.RewindableInputStream -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.Task20LineHistoryEventEmitter -->
|
|
<class name="Task20LineHistoryEventEmitter" extends="org.apache.hadoop.tools.rumen.HistoryEventEmitter"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Task20LineHistoryEventEmitter"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.Task20LineHistoryEventEmitter -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.TaskAttempt20LineEventEmitter -->
|
|
<class name="TaskAttempt20LineEventEmitter" extends="org.apache.hadoop.tools.rumen.HistoryEventEmitter"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskAttempt20LineEventEmitter"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.TaskAttempt20LineEventEmitter -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.TaskAttemptInfo -->
|
|
<class name="TaskAttemptInfo" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskAttemptInfo" type="org.apache.hadoop.mapred.TaskStatus.State, org.apache.hadoop.tools.rumen.TaskInfo"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRunState" return="org.apache.hadoop.mapred.TaskStatus.State"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the final {@link TaskStatus.State} of the task-attempt.
|
|
|
|
@return the final <code>State</code> of the task-attempt]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRuntime" return="long"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the total runtime for the task-attempt.
|
|
|
|
@return the total runtime for the task-attempt]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskInfo" return="org.apache.hadoop.tools.rumen.TaskInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link TaskInfo} for the given task-attempt.
|
|
|
|
@return the <code>TaskInfo</code> for the given task-attempt]]>
|
|
</doc>
|
|
</method>
|
|
<field name="state" type="org.apache.hadoop.mapred.TaskStatus.State"
|
|
transient="false" volatile="false"
|
|
static="false" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="taskInfo" type="org.apache.hadoop.tools.rumen.TaskInfo"
|
|
transient="false" volatile="false"
|
|
static="false" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[{@link TaskAttemptInfo} is a collection of statistics about a particular
|
|
task-attempt gleaned from job-history of the job.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.TaskAttemptInfo -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.TaskInfo -->
|
|
<class name="TaskInfo" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskInfo" type="long, int, long, int, long"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getInputBytes" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Raw bytes read from the FileSystem into the task. Note that this
|
|
may not always match the input bytes to the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputRecords" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Number of records input to this task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputBytes" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Raw bytes written to the destination FileSystem. Note that this may
|
|
not match output bytes.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputRecords" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Number of records output from this task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskMemory" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Memory used by the task leq the heap size.]]>
|
|
</doc>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.TaskInfo -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.TopologyBuilder -->
|
|
<class name="TopologyBuilder" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TopologyBuilder"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="process"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="event" type="org.apache.hadoop.mapreduce.jobhistory.HistoryEvent"/>
|
|
<doc>
|
|
<![CDATA[Process one {@link HistoryEvent}
|
|
|
|
@param event
|
|
The {@link HistoryEvent} to be processed.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="process"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="java.util.Properties"/>
|
|
<doc>
|
|
<![CDATA[Process a collection of JobConf {@link Properties}. We do not restrict it
|
|
to be called once.
|
|
|
|
@param conf
|
|
The job conf properties to be added.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="build" return="org.apache.hadoop.tools.rumen.LoggedNetworkTopology"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Request the builder to build the final object. Once called, the
|
|
{@link TopologyBuilder} would accept no more events or job-conf properties.
|
|
|
|
@return Parsed {@link LoggedNetworkTopology} object.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Building the cluster topology.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.TopologyBuilder -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.TraceBuilder -->
|
|
<class name="TraceBuilder" extends="org.apache.hadoop.conf.Configured"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Tool"/>
|
|
<constructor name="TraceBuilder"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
</method>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The main driver of the Rumen Parser.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.TraceBuilder -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.TreePath -->
|
|
<class name="TreePath" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TreePath" type="org.apache.hadoop.tools.rumen.TreePath, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="TreePath" type="org.apache.hadoop.tools.rumen.TreePath, java.lang.String, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This describes a path from a node to the root. We use it when we compare two
|
|
trees during rumen unit tests. If the trees are not identical, this chain
|
|
will be converted to a string which describes the path from the root to the
|
|
fields that did not compare.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.TreePath -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.ZombieCluster -->
|
|
<class name="ZombieCluster" extends="org.apache.hadoop.tools.rumen.AbstractClusterStory"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ZombieCluster" type="org.apache.hadoop.tools.rumen.LoggedNetworkTopology, org.apache.hadoop.tools.rumen.MachineNode"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a homogeneous cluster. We assume that the leaves on the topology
|
|
are {@link MachineNode}s, and the parents of {@link MachineNode}s are
|
|
{@link RackNode}s. We also expect all leaf nodes are on the same level.
|
|
|
|
@param topology
|
|
The network topology.
|
|
@param defaultNode
|
|
The default node setting.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="ZombieCluster" type="org.apache.hadoop.fs.Path, org.apache.hadoop.tools.rumen.MachineNode, org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Construct a homogeneous cluster. We assume that the leaves on the topology
|
|
are {@link MachineNode}s, and the parents of {@link MachineNode}s are
|
|
{@link RackNode}s. We also expect all leaf nodes are on the same level.
|
|
|
|
@param path Path to the JSON-encoded topology file.
|
|
@param conf
|
|
@param defaultNode
|
|
The default node setting.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="ZombieCluster" type="java.io.InputStream, org.apache.hadoop.tools.rumen.MachineNode"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Construct a homogeneous cluster. We assume that the leaves on the topology
|
|
are {@link MachineNode}s, and the parents of {@link MachineNode}s are
|
|
{@link RackNode}s. We also expect all leaf nodes are on the same level.
|
|
|
|
@param input The input stream for the JSON-encoded topology file.
|
|
@param defaultNode
|
|
The default node setting.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getClusterTopology" return="org.apache.hadoop.tools.rumen.Node"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link ZombieCluster} rebuilds the cluster topology using the information
|
|
obtained from job history logs.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.ZombieCluster -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.ZombieJob -->
|
|
<class name="ZombieJob" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.tools.rumen.JobStory"/>
|
|
<constructor name="ZombieJob" type="org.apache.hadoop.tools.rumen.LoggedJob, org.apache.hadoop.tools.rumen.ClusterStory, long"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[This constructor creates a {@link ZombieJob} with the same semantics as the
|
|
{@link LoggedJob} passed in this parameter
|
|
|
|
@param job
|
|
The dead job this ZombieJob instance is based on.
|
|
@param cluster
|
|
The cluster topology where the dead job ran on. This argument can
|
|
be null if we do not have knowledge of the cluster topology.
|
|
@param seed
|
|
Seed for the random number generator for filling in information
|
|
not available from the ZombieJob.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="ZombieJob" type="org.apache.hadoop.tools.rumen.LoggedJob, org.apache.hadoop.tools.rumen.ClusterStory"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[This constructor creates a {@link ZombieJob} with the same semantics as the
|
|
{@link LoggedJob} passed in this parameter
|
|
|
|
@param job
|
|
The dead job this ZombieJob instance is based on.
|
|
@param cluster
|
|
The cluster topology where the dead job ran on. This argument can
|
|
be null if we do not have knowledge of the cluster topology.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getJobConf" return="org.apache.hadoop.mapred.JobConf"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getInputSplits" return="org.apache.hadoop.mapreduce.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getNumberMaps" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getNumberReduces" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getOutcome" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getSubmissionTime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getQueueName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getNumLoggedMaps" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Getting the number of map tasks that are actually logged in the trace.
|
|
@return The number of map tasks that are actually logged in the trace.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumLoggedReduces" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Getting the number of reduce tasks that are actually logged in the trace.
|
|
@return The number of map tasks that are actually logged in the trace.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUser" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getLoggedJob" return="org.apache.hadoop.tools.rumen.LoggedJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the underlining {@link LoggedJob} object read directly from the trace.
|
|
This is mainly for debugging.
|
|
|
|
@return the underlining {@link LoggedJob} object]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskAttemptInfo" return="org.apache.hadoop.tools.rumen.TaskAttemptInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskType" type="org.apache.hadoop.mapreduce.TaskType"/>
|
|
<param name="taskNumber" type="int"/>
|
|
<param name="taskAttemptNumber" type="int"/>
|
|
<doc>
|
|
<![CDATA[Get a {@link TaskAttemptInfo} with a {@link TaskAttemptID} associated with
|
|
taskType, taskNumber, and taskAttemptNumber. This function does not care
|
|
about locality, and follows the following decision logic: 1. Make up a
|
|
{@link TaskAttemptInfo} if the task attempt is missing in trace, 2. Make up
|
|
a {@link TaskAttemptInfo} if the task attempt has a KILLED final status in
|
|
trace, 3. Otherwise (final state is SUCCEEDED or FAILED), construct the
|
|
{@link TaskAttemptInfo} from the trace.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskInfo" return="org.apache.hadoop.tools.rumen.TaskInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskType" type="org.apache.hadoop.mapreduce.TaskType"/>
|
|
<param name="taskNumber" type="int"/>
|
|
</method>
|
|
<method name="getMapTaskAttemptInfoAdjusted" return="org.apache.hadoop.tools.rumen.TaskAttemptInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskNumber" type="int"/>
|
|
<param name="taskAttemptNumber" type="int"/>
|
|
<param name="locality" type="int"/>
|
|
<doc>
|
|
<![CDATA[Get a {@link TaskAttemptInfo} with a {@link TaskAttemptID} associated with
|
|
taskType, taskNumber, and taskAttemptNumber. This function considers
|
|
locality, and follows the following decision logic: 1. Make up a
|
|
{@link TaskAttemptInfo} if the task attempt is missing in trace, 2. Make up
|
|
a {@link TaskAttemptInfo} if the task attempt has a KILLED final status in
|
|
trace, 3. If final state is FAILED, construct a {@link TaskAttemptInfo}
|
|
from the trace, without considering locality. 4. If final state is
|
|
SUCCEEDED, construct a {@link TaskAttemptInfo} from the trace, with runtime
|
|
scaled according to locality in simulation and locality in trace.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link ZombieJob} is a layer above {@link LoggedJob} raw JSON objects.
|
|
|
|
Each {@link ZombieJob} object represents a job in job history. For everything
|
|
that exists in job history, contents are returned unchanged faithfully. To
|
|
get input splits of a non-exist task, a non-exist task attempt, or an
|
|
ill-formed task attempt, proper objects are made up from statistical
|
|
sketches.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.ZombieJob -->
|
|
<!-- start class org.apache.hadoop.tools.rumen.ZombieJobProducer -->
|
|
<class name="ZombieJobProducer" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.tools.rumen.JobStoryProducer"/>
|
|
<constructor name="ZombieJobProducer" type="org.apache.hadoop.fs.Path, org.apache.hadoop.tools.rumen.ZombieCluster, org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Constructor
|
|
|
|
@param path
|
|
Path to the JSON trace file, possibly compressed.
|
|
@param cluster
|
|
The topology of the cluster that corresponds to the jobs in the
|
|
trace. The argument can be null if we do not have knowledge of the
|
|
cluster topology.
|
|
@param conf
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="ZombieJobProducer" type="org.apache.hadoop.fs.Path, org.apache.hadoop.tools.rumen.ZombieCluster, org.apache.hadoop.conf.Configuration, long"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Constructor
|
|
|
|
@param path
|
|
Path to the JSON trace file, possibly compressed.
|
|
@param cluster
|
|
The topology of the cluster that corresponds to the jobs in the
|
|
trace. The argument can be null if we do not have knowledge of the
|
|
cluster topology.
|
|
@param conf
|
|
@param randomSeed
|
|
use a deterministic seed.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="ZombieJobProducer" type="java.io.InputStream, org.apache.hadoop.tools.rumen.ZombieCluster"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Constructor
|
|
|
|
@param input
|
|
The input stream for the JSON trace.
|
|
@param cluster
|
|
The topology of the cluster that corresponds to the jobs in the
|
|
trace. The argument can be null if we do not have knowledge of the
|
|
cluster topology.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="ZombieJobProducer" type="java.io.InputStream, org.apache.hadoop.tools.rumen.ZombieCluster, long"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Constructor
|
|
|
|
@param input
|
|
The input stream for the JSON trace.
|
|
@param cluster
|
|
The topology of the cluster that corresponds to the jobs in the
|
|
trace. The argument can be null if we do not have knowledge of the
|
|
cluster topology.
|
|
@param randomSeed
|
|
use a deterministic seed.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getNextJob" return="org.apache.hadoop.tools.rumen.ZombieJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Producing {@link JobStory}s from job trace.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.rumen.ZombieJobProducer -->
|
|
</package>
|
|
|
|
</api>
|