a196766ea0
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1134994 13f79535-47bb-0310-9956-ffa450edef68
21033 lines
874 KiB
XML
21033 lines
874 KiB
XML
<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
|
|
<!-- Generated by the JDiff Javadoc doclet -->
|
|
<!-- (http://www.jdiff.org) -->
|
|
<!-- on Sun May 31 20:42:50 PDT 2009 -->
|
|
|
|
<api
|
|
xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'
|
|
xsi:noNamespaceSchemaLocation='api.xsd'
|
|
name="hadoopp-mapred 0.20.0"
|
|
jdversion="1.0.9">
|
|
|
|
<!-- Command line arguments = -doclet jdiff.JDiff -docletpath /home/gkesavan/release-0.20.0/build/ivy/lib/Hadoop/jdiff/jdiff-1.0.9.jar:/home/gkesavan/release-0.20.0/build/ivy/lib/Hadoop/jdiff/xerces-1.4.4.jar -classpath /home/gkesavan/release-0.20.0/build/classes:/home/gkesavan/release-0.20.0/lib/commons-cli-2.0-SNAPSHOT.jar:/home/gkesavan/release-0.20.0/lib/hsqldb-1.8.0.10.jar:/home/gkesavan/release-0.20.0/lib/jsp-2.1/jsp-2.1.jar:/home/gkesavan/release-0.20.0/lib/jsp-2.1/jsp-api-2.1.jar:/home/gkesavan/release-0.20.0/lib/kfs-0.2.2.jar:/home/gkesavan/release-0.20.0/conf:/home/gkesavan/.ivy2/cache/commons-logging/commons-logging/jars/commons-logging-1.0.4.jar:/home/gkesavan/.ivy2/cache/log4j/log4j/jars/log4j-1.2.15.jar:/home/gkesavan/.ivy2/cache/commons-httpclient/commons-httpclient/jars/commons-httpclient-3.0.1.jar:/home/gkesavan/.ivy2/cache/commons-codec/commons-codec/jars/commons-codec-1.3.jar:/home/gkesavan/.ivy2/cache/xmlenc/xmlenc/jars/xmlenc-0.52.jar:/home/gkesavan/.ivy2/cache/net.java.dev.jets3t/jets3t/jars/jets3t-0.6.1.jar:/home/gkesavan/.ivy2/cache/commons-net/commons-net/jars/commons-net-1.4.1.jar:/home/gkesavan/.ivy2/cache/org.mortbay.jetty/servlet-api-2.5/jars/servlet-api-2.5-6.1.14.jar:/home/gkesavan/.ivy2/cache/oro/oro/jars/oro-2.0.8.jar:/home/gkesavan/.ivy2/cache/org.mortbay.jetty/jetty/jars/jetty-6.1.14.jar:/home/gkesavan/.ivy2/cache/org.mortbay.jetty/jetty-util/jars/jetty-util-6.1.14.jar:/home/gkesavan/.ivy2/cache/tomcat/jasper-runtime/jars/jasper-runtime-5.5.12.jar:/home/gkesavan/.ivy2/cache/tomcat/jasper-compiler/jars/jasper-compiler-5.5.12.jar:/home/gkesavan/.ivy2/cache/commons-el/commons-el/jars/commons-el-1.0.jar:/home/gkesavan/.ivy2/cache/junit/junit/jars/junit-3.8.1.jar:/home/gkesavan/.ivy2/cache/commons-logging/commons-logging-api/jars/commons-logging-api-1.0.4.jar:/home/gkesavan/.ivy2/cache/org.slf4j/slf4j-api/jars/slf4j-api-1.4.3.jar:/home/gkesavan/.ivy2/cache/org.eclipse.jdt/core/jars/core-3.1.1.jar:/home/gkesavan/.ivy2/cache/org.slf4j/slf4j-log4j12/jars/slf4j-log4j12-1.4.3.jar:/home/gkesavan/.ivy2/cache/jdiff/jdiff/jars/jdiff-1.0.9.jar:/home/gkesavan/.ivy2/cache/xerces/xerces/jars/xerces-1.4.4.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-launcher.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-resolver.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-starteam.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-netrexx.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-testutil.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-jai.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-swing.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-jmf.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-bcel.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-jdepend.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-jsch.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-bsf.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-antlr.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-weblogic.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-junit.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-log4j.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/xercesImpl.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-oro.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-trax.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-nodeps.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-commons-logging.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-regexp.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-stylebook.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-javamail.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-commons-net.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/xml-apis.jar:/home/gkesavan/tools/jdk1.6.0_07-32bit/lib/tools.jar -sourcepath /home/gkesavan/release-0.20.0/src/mapred:/home/gkesavan/release-0.20.0/src/tools -apidir /home/gkesavan/release-0.20.0/lib/jdiff -apiname hadoop-mapred 0.20.0 -->
|
|
<package name="org.apache.hadoop.mapred">
|
|
<!-- start class org.apache.hadoop.mapred.ClusterStatus -->
|
|
<class name="ClusterStatus" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<method name="getTaskTrackers" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of task trackers in the cluster.
|
|
|
|
@return the number of task trackers in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getActiveTrackerNames" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the names of task trackers in the cluster.
|
|
|
|
@return the active task trackers in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getBlacklistedTrackerNames" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the names of task trackers in the cluster.
|
|
|
|
@return the blacklisted task trackers in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getBlacklistedTrackers" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of blacklisted task trackers in the cluster.
|
|
|
|
@return the number of blacklisted task trackers in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTTExpiryInterval" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the tasktracker expiry interval for the cluster
|
|
@return the expiry interval in msec]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of currently running map tasks in the cluster.
|
|
|
|
@return the number of currently running map tasks in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReduceTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of currently running reduce tasks in the cluster.
|
|
|
|
@return the number of currently running reduce tasks in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxMapTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the maximum capacity for running map tasks in the cluster.
|
|
|
|
@return the maximum capacity for running map tasks in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxReduceTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the maximum capacity for running reduce tasks in the cluster.
|
|
|
|
@return the maximum capacity for running reduce tasks in the cluster.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobTrackerState" return="org.apache.hadoop.mapred.JobTracker.State"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the current state of the <code>JobTracker</code>,
|
|
as {@link JobTracker.State}
|
|
|
|
@return the current state of the <code>JobTracker</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUsedMemory" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the total heap memory used by the <code>JobTracker</code>
|
|
|
|
@return the size of heap memory used by the <code>JobTracker</code>]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxMemory" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the maximum configured heap memory that can be used by the <code>JobTracker</code>
|
|
|
|
@return the configured size of max heap memory that can be used by the <code>JobTracker</code>]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Status information on the current state of the Map-Reduce cluster.
|
|
|
|
<p><code>ClusterStatus</code> provides clients with information such as:
|
|
<ol>
|
|
<li>
|
|
Size of the cluster.
|
|
</li>
|
|
<li>
|
|
Name of the trackers.
|
|
</li>
|
|
<li>
|
|
Task capacity of the cluster.
|
|
</li>
|
|
<li>
|
|
The number of currently running map & reduce tasks.
|
|
</li>
|
|
<li>
|
|
State of the <code>JobTracker</code>.
|
|
</li>
|
|
</ol></p>
|
|
|
|
<p>Clients can query for the latest <code>ClusterStatus</code>, via
|
|
{@link JobClient#getClusterStatus()}.</p>
|
|
|
|
@see JobClient]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.ClusterStatus -->
|
|
<!-- start class org.apache.hadoop.mapred.Counters -->
|
|
<class name="Counters" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.Counters} instead.">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<implements name="java.lang.Iterable"/>
|
|
<constructor name="Counters"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getGroupNames" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the names of all counter classes.
|
|
@return Set of counter names.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="iterator" return="java.util.Iterator"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getGroup" return="org.apache.hadoop.mapred.Counters.Group"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="groupName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Returns the named counter group, or an empty group if there is none
|
|
with the specified name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Enum"/>
|
|
<doc>
|
|
<![CDATA[Find the counter for the given enum. The same enum will always return the
|
|
same counter.
|
|
@param key the counter key
|
|
@return the matching counter object]]>
|
|
</doc>
|
|
</method>
|
|
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="group" type="java.lang.String"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Find a counter given the group and the name.
|
|
@param group the name of the group
|
|
@param name the internal name of the counter
|
|
@return the counter for that name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="deprecated, no comment">
|
|
<param name="group" type="java.lang.String"/>
|
|
<param name="id" type="int"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Find a counter by using strings
|
|
@param group the name of the group
|
|
@param id the id of the counter within the group (0 to N-1)
|
|
@param name the internal name of the counter
|
|
@return the counter for that name
|
|
@deprecated]]>
|
|
</doc>
|
|
</method>
|
|
<method name="incrCounter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Enum"/>
|
|
<param name="amount" type="long"/>
|
|
<doc>
|
|
<![CDATA[Increments the specified counter by the specified amount, creating it if
|
|
it didn't already exist.
|
|
@param key identifies a counter
|
|
@param amount amount by which counter is to be incremented]]>
|
|
</doc>
|
|
</method>
|
|
<method name="incrCounter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="group" type="java.lang.String"/>
|
|
<param name="counter" type="java.lang.String"/>
|
|
<param name="amount" type="long"/>
|
|
<doc>
|
|
<![CDATA[Increments the specified counter by the specified amount, creating it if
|
|
it didn't already exist.
|
|
@param group the name of the group
|
|
@param counter the internal name of the counter
|
|
@param amount amount by which counter is to be incremented]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Enum"/>
|
|
<doc>
|
|
<![CDATA[Returns current value of the specified counter, or 0 if the counter
|
|
does not exist.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="incrAllCounters"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="org.apache.hadoop.mapred.Counters"/>
|
|
<doc>
|
|
<![CDATA[Increments multiple counters by their amounts in another Counters
|
|
instance.
|
|
@param other the other Counters instance]]>
|
|
</doc>
|
|
</method>
|
|
<method name="sum" return="org.apache.hadoop.mapred.Counters"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="a" type="org.apache.hadoop.mapred.Counters"/>
|
|
<param name="b" type="org.apache.hadoop.mapred.Counters"/>
|
|
<doc>
|
|
<![CDATA[Convenience method for computing the sum of two sets of counters.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="size" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the total number of counters, by summing the number of counters
|
|
in each group.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Write the set of groups.
|
|
The external format is:
|
|
#groups (groupName group)*
|
|
|
|
i.e. the number of groups followed by 0 or more groups, where each
|
|
group is of the form:
|
|
|
|
groupDisplayName #counters (false | true counter)*
|
|
|
|
where each counter is of the form:
|
|
|
|
name (false | true displayName) value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read a set of groups.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="log"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="log" type="org.apache.commons.logging.Log"/>
|
|
<doc>
|
|
<![CDATA[Logs the current counter values.
|
|
@param log The log to use.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return textual representation of the counter values.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="makeCompactString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Convert a counters object into a single line that is easy to parse.
|
|
@return the string with "name=value" for each counter and separated by ","]]>
|
|
</doc>
|
|
</method>
|
|
<method name="makeEscapedCompactString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Represent the counter in a textual format that can be converted back to
|
|
its object form
|
|
@return the string in the following format
|
|
{(groupname)(group-displayname)[(countername)(displayname)(value)][][]}{}{}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="fromEscapedCompactString" return="org.apache.hadoop.mapred.Counters"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="compactString" type="java.lang.String"/>
|
|
<exception name="ParseException" type="java.text.ParseException"/>
|
|
<doc>
|
|
<![CDATA[Convert a stringified counter representation into a counter object. Note
|
|
that the counter can be recovered if its stringified using
|
|
{@link #makeEscapedCompactString()}.
|
|
@return a Counter]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="obj" type="java.lang.Object"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A set of named counters.
|
|
|
|
<p><code>Counters</code> represent global counters, defined either by the
|
|
Map-Reduce framework or applications. Each <code>Counter</code> can be of
|
|
any {@link Enum} type.</p>
|
|
|
|
<p><code>Counters</code> are bunched into {@link Group}s, each comprising of
|
|
counters from a particular <code>Enum</code> class.
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.Counters} instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.Counters -->
|
|
<!-- start class org.apache.hadoop.mapred.Counters.Counter -->
|
|
<class name="Counters.Counter" extends="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="setDisplayName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="newName" type="java.lang.String"/>
|
|
</method>
|
|
<method name="makeEscapedCompactString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the compact stringified version of the counter in the format
|
|
[(actual-name)(display-name)(value)]]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[What is the current value of this counter?
|
|
@return the current value]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A counter record, comprising its name and value.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.Counters.Counter -->
|
|
<!-- start class org.apache.hadoop.mapred.Counters.Group -->
|
|
<class name="Counters.Group" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<implements name="java.lang.Iterable"/>
|
|
<method name="getName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns raw name of the group. This is the name of the enum class
|
|
for this group of counters.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDisplayName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns localized name of the group. This is the same as getName() by
|
|
default, but different if an appropriate ResourceBundle is found.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setDisplayName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="displayName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the display name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="makeEscapedCompactString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the compact stringified version of the group in the format
|
|
{(actual-name)(display-name)(value)[][][]} where [] are compact strings for the
|
|
counters within.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="obj" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Checks for (content) equality of Groups]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="counterName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Returns the value of the specified counter, or 0 if the counter does
|
|
not exist.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="use {@link #getCounter(String)} instead">
|
|
<param name="id" type="int"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Get the counter for the given id and create it if it doesn't exist.
|
|
@param id the numeric id of the counter within the group
|
|
@param name the internal counter name
|
|
@return the counter
|
|
@deprecated use {@link #getCounter(String)} instead]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounterForName" return="org.apache.hadoop.mapred.Counters.Counter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Get the counter for the given name and create it if it doesn't exist.
|
|
@param name the internal counter name
|
|
@return the counter]]>
|
|
</doc>
|
|
</method>
|
|
<method name="size" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the number of counters in this group.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="iterator" return="java.util.Iterator"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>Group</code> of counters, comprising of counters from a particular
|
|
counter {@link Enum} class.
|
|
|
|
<p><code>Group</code>handles localization of the class name and the
|
|
counter names.</p>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.Counters.Group -->
|
|
<!-- start class org.apache.hadoop.mapred.DefaultJobHistoryParser -->
|
|
<class name="DefaultJobHistoryParser" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="DefaultJobHistoryParser"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="parseJobTasks"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobHistoryFile" type="java.lang.String"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobHistory.JobInfo"/>
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Populates a JobInfo object from the job's history log file.
|
|
@param jobHistoryFile history file for this job.
|
|
@param job a precreated JobInfo object, should be non-null.
|
|
@param fs FileSystem where historyFile is present.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Default parser for job history files. It creates object model from
|
|
job history file.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.DefaultJobHistoryParser -->
|
|
<!-- start class org.apache.hadoop.mapred.FileAlreadyExistsException -->
|
|
<class name="FileAlreadyExistsException" extends="java.io.IOException"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="FileAlreadyExistsException"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="FileAlreadyExistsException" type="java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[Used when target file already exists for any operation and
|
|
is not configured to be overwritten.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.FileAlreadyExistsException -->
|
|
<!-- start class org.apache.hadoop.mapred.FileInputFormat -->
|
|
<class name="FileInputFormat" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}
|
|
instead.">
|
|
<implements name="org.apache.hadoop.mapred.InputFormat"/>
|
|
<constructor name="FileInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setMinSplitSize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="minSplitSize" type="long"/>
|
|
</method>
|
|
<method name="isSplitable" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="filename" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Is the given filename splitable? Usually, true, but if the file is
|
|
stream compressed, it will not be.
|
|
|
|
<code>FileInputFormat</code> implementations can override this and return
|
|
<code>false</code> to ensure that individual input files are never split-up
|
|
so that {@link Mapper}s process entire files.
|
|
|
|
@param fs the file system that the file is on
|
|
@param filename the file name to check
|
|
@return is this file splitable?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="setInputPathFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="filter" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set a PathFilter to be applied to the input paths for the map-reduce job.
|
|
|
|
@param filter the PathFilter class use for filtering the input paths.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputPathFilter" return="org.apache.hadoop.fs.PathFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get a PathFilter instance of the filter set for the input paths.
|
|
|
|
@return the PathFilter instance set for the job, NULL if none has been set.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[List input directories.
|
|
Subclasses may override to, e.g., select only files matching a regular
|
|
expression.
|
|
|
|
@param job the job to list input paths for
|
|
@return array of FileStatus objects
|
|
@throws IOException if zero items.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="numSplits" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Splits files returned by {@link #listStatus(JobConf)} when
|
|
they're too big.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="computeSplitSize" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="goalSize" type="long"/>
|
|
<param name="minSize" type="long"/>
|
|
<param name="blockSize" type="long"/>
|
|
</method>
|
|
<method name="getBlockIndex" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/>
|
|
<param name="offset" type="long"/>
|
|
</method>
|
|
<method name="setInputPaths"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="commaSeparatedPaths" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Sets the given comma separated paths as the list of inputs
|
|
for the map-reduce job.
|
|
|
|
@param conf Configuration of the job
|
|
@param commaSeparatedPaths Comma separated paths to be set as
|
|
the list of inputs for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addInputPaths"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="commaSeparatedPaths" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Add the given comma separated paths to the list of inputs for
|
|
the map-reduce job.
|
|
|
|
@param conf The configuration of the job
|
|
@param commaSeparatedPaths Comma separated paths to be added to
|
|
the list of inputs for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInputPaths"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="inputPaths" type="org.apache.hadoop.fs.Path[]"/>
|
|
<doc>
|
|
<![CDATA[Set the array of {@link Path}s as the list of inputs
|
|
for the map-reduce job.
|
|
|
|
@param conf Configuration of the job.
|
|
@param inputPaths the {@link Path}s of the input directories/files
|
|
for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addInputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Add a {@link Path} to the list of inputs for the map-reduce job.
|
|
|
|
@param conf The configuration of the job
|
|
@param path {@link Path} to be added to the list of inputs for
|
|
the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputPaths" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the list of input {@link Path}s for the map-reduce job.
|
|
|
|
@param conf The configuration of the job
|
|
@return the list of input {@link Path}s for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplitHosts" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/>
|
|
<param name="offset" type="long"/>
|
|
<param name="splitSize" type="long"/>
|
|
<param name="clusterMap" type="org.apache.hadoop.net.NetworkTopology"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This function identifies and returns the hosts that contribute
|
|
most for a given split. For calculating the contribution, rack
|
|
locality is treated on par with host locality, so hosts from racks
|
|
that contribute the most are preferred over hosts on racks that
|
|
contribute less
|
|
@param blkLocations The list of block locations
|
|
@param offset
|
|
@param splitSize
|
|
@return array of hosts that contribute most to this split
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A base class for file-based {@link InputFormat}.
|
|
|
|
<p><code>FileInputFormat</code> is the base class for all file-based
|
|
<code>InputFormat</code>s. This provides a generic implementation of
|
|
{@link #getSplits(JobConf, int)}.
|
|
Subclasses of <code>FileInputFormat</code> can also override the
|
|
{@link #isSplitable(FileSystem, Path)} method to ensure input-files are
|
|
not split-up and are processed as a whole by {@link Mapper}s.
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}
|
|
instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.FileInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.FileOutputCommitter -->
|
|
<class name="FileOutputCommitter" extends="org.apache.hadoop.mapred.OutputCommitter"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="FileOutputCommitter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setupJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="cleanupJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="setupTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="commitTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="abortTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="needsTaskCommit" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="TEMP_DIR_NAME" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Temporary directory name]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[An {@link OutputCommitter} that commits files specified
|
|
in job output directory i.e. ${mapred.output.dir}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.FileOutputCommitter -->
|
|
<!-- start class org.apache.hadoop.mapred.FileOutputFormat -->
|
|
<class name="FileOutputFormat" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.OutputFormat"/>
|
|
<constructor name="FileOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setCompressOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="compress" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the output of the job is compressed.
|
|
@param conf the {@link JobConf} to modify
|
|
@param compress should the output of the job be compressed?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCompressOutput" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Is the job output compressed?
|
|
@param conf the {@link JobConf} to look in
|
|
@return <code>true</code> if the job output should be compressed,
|
|
<code>false</code> otherwise]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputCompressorClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="codecClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link CompressionCodec} to be used to compress job outputs.
|
|
@param conf the {@link JobConf} to modify
|
|
@param codecClass the {@link CompressionCodec} to be used to
|
|
compress the job outputs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputCompressorClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="defaultValue" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link CompressionCodec} for compressing the job outputs.
|
|
@param conf the {@link JobConf} to look in
|
|
@param defaultValue the {@link CompressionCodec} to return if not set
|
|
@return the {@link CompressionCodec} to be used to compress the
|
|
job outputs
|
|
@throws IllegalArgumentException if the class was specified, but not found]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="FileAlreadyExistsException" type="org.apache.hadoop.mapred.FileAlreadyExistsException"/>
|
|
<exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="setOutputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="outputDir" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Path} of the output directory for the map-reduce job.
|
|
|
|
@param conf The configuration of the job.
|
|
@param outputDir the {@link Path} of the output directory for
|
|
the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Path} to the output directory for the map-reduce job.
|
|
|
|
@return the {@link Path} to the output directory for the map-reduce job.
|
|
@see FileOutputFormat#getWorkOutputPath(JobConf)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getWorkOutputPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Path} to the task's temporary output directory
|
|
for the map-reduce job
|
|
|
|
<h4 id="SideEffectFiles">Tasks' Side-Effect Files</h4>
|
|
|
|
<p><i>Note:</i> The following is valid only if the {@link OutputCommitter}
|
|
is {@link FileOutputCommitter}. If <code>OutputCommitter</code> is not
|
|
a <code>FileOutputCommitter</code>, the task's temporary output
|
|
directory is same as {@link #getOutputPath(JobConf)} i.e.
|
|
<tt>${mapred.output.dir}$</tt></p>
|
|
|
|
<p>Some applications need to create/write-to side-files, which differ from
|
|
the actual job-outputs.
|
|
|
|
<p>In such cases there could be issues with 2 instances of the same TIP
|
|
(running simultaneously e.g. speculative tasks) trying to open/write-to the
|
|
same file (path) on HDFS. Hence the application-writer will have to pick
|
|
unique names per task-attempt (e.g. using the attemptid, say
|
|
<tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p>
|
|
|
|
<p>To get around this the Map-Reduce framework helps the application-writer
|
|
out by maintaining a special
|
|
<tt>${mapred.output.dir}/_temporary/_${taskid}</tt>
|
|
sub-directory for each task-attempt on HDFS where the output of the
|
|
task-attempt goes. On successful completion of the task-attempt the files
|
|
in the <tt>${mapred.output.dir}/_temporary/_${taskid}</tt> (only)
|
|
are <i>promoted</i> to <tt>${mapred.output.dir}</tt>. Of course, the
|
|
framework discards the sub-directory of unsuccessful task-attempts. This
|
|
is completely transparent to the application.</p>
|
|
|
|
<p>The application-writer can take advantage of this by creating any
|
|
side-files required in <tt>${mapred.work.output.dir}</tt> during execution
|
|
of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the
|
|
framework will move them out similarly - thus she doesn't have to pick
|
|
unique paths per task-attempt.</p>
|
|
|
|
<p><i>Note</i>: the value of <tt>${mapred.work.output.dir}</tt> during
|
|
execution of a particular task-attempt is actually
|
|
<tt>${mapred.output.dir}/_temporary/_{$taskid}</tt>, and this value is
|
|
set by the map-reduce framework. So, just create any side-files in the
|
|
path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce
|
|
task to take advantage of this feature.</p>
|
|
|
|
<p>The entire discussion holds true for maps of jobs with
|
|
reducer=NONE (i.e. 0 reduces) since output of the map, in that case,
|
|
goes directly to HDFS.</p>
|
|
|
|
@return the {@link Path} to the task's temporary output directory
|
|
for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskOutputPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Helper function to create the task's temporary output directory and
|
|
return the path to the task's output file.
|
|
|
|
@param conf job-configuration
|
|
@param name temporary task-output filename
|
|
@return path to the task's temporary output file
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUniqueName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Helper function to generate a name that is unique for the task.
|
|
|
|
<p>The generated name can be used to create custom files from within the
|
|
different tasks for the job, the names for different tasks will not collide
|
|
with each other.</p>
|
|
|
|
<p>The given name is postfixed with the task type, 'm' for maps, 'r' for
|
|
reduces and the task partition number. For example, give a name 'test'
|
|
running on the first map o the job the generated name will be
|
|
'test-m-00000'.</p>
|
|
|
|
@param conf the configuration for the job.
|
|
@param name the name to make unique.
|
|
@return a unique name accross all tasks of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPathForCustomFile" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Helper function to generate a {@link Path} for a file that is unique for
|
|
the task within the job output directory.
|
|
|
|
<p>The path can be used to create custom files from within the map and
|
|
reduce tasks. The path name will be unique for each task. The path parent
|
|
will be the job output directory.</p>ls
|
|
|
|
<p>This method uses the {@link #getUniqueName} method to make the file name
|
|
unique for the task.</p>
|
|
|
|
@param conf the configuration for the job.
|
|
@param name the name for the file.
|
|
@return a unique path accross all tasks of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A base class for {@link OutputFormat}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.FileOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.FileSplit -->
|
|
<class name="FileSplit" extends="org.apache.hadoop.mapreduce.InputSplit"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.FileSplit}
|
|
instead.">
|
|
<implements name="org.apache.hadoop.mapred.InputSplit"/>
|
|
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, org.apache.hadoop.mapred.JobConf"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="deprecated, no comment">
|
|
<doc>
|
|
<![CDATA[Constructs a split.
|
|
@deprecated
|
|
@param file the file name
|
|
@param start the position of the first byte in the file to process
|
|
@param length the number of bytes in the file to process]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a split with host information
|
|
|
|
@param file the file name
|
|
@param start the position of the first byte in the file to process
|
|
@param length the number of bytes in the file to process
|
|
@param hosts the list of hosts containing the block, possibly null]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The file containing this split's data.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getStart" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The position of the first byte in the file to process.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The number of bytes in the file to process.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A section of an input file. Returned by {@link
|
|
InputFormat#getSplits(JobConf, int)} and passed to
|
|
{@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}.
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.FileSplit}
|
|
instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.FileSplit -->
|
|
<!-- start class org.apache.hadoop.mapred.ID -->
|
|
<class name="ID" extends="org.apache.hadoop.mapreduce.ID"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ID" type="int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[constructs an ID object from the given int]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="ID"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[A general identifier, which internally stores the id
|
|
as an integer. This is the super class of {@link JobID},
|
|
{@link TaskID} and {@link TaskAttemptID}.
|
|
|
|
@see JobID
|
|
@see TaskID
|
|
@see TaskAttemptID]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.ID -->
|
|
<!-- start interface org.apache.hadoop.mapred.InputFormat -->
|
|
<interface name="InputFormat" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.InputFormat} instead.">
|
|
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="numSplits" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Logically split the set of input files for the job.
|
|
|
|
<p>Each {@link InputSplit} is then assigned to an individual {@link Mapper}
|
|
for processing.</p>
|
|
|
|
<p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the
|
|
input files are not physically split into chunks. For e.g. a split could
|
|
be <i><input-file-path, start, offset></i> tuple.
|
|
|
|
@param job job configuration.
|
|
@param numSplits the desired number of splits, a hint.
|
|
@return an array of {@link InputSplit}s for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link RecordReader} for the given {@link InputSplit}.
|
|
|
|
<p>It is the responsibility of the <code>RecordReader</code> to respect
|
|
record boundaries while processing the logical split to present a
|
|
record-oriented view to the individual task.</p>
|
|
|
|
@param split the {@link InputSplit}
|
|
@param job the job that this split belongs to
|
|
@return a {@link RecordReader}]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>InputFormat</code> describes the input-specification for a
|
|
Map-Reduce job.
|
|
|
|
<p>The Map-Reduce framework relies on the <code>InputFormat</code> of the
|
|
job to:<p>
|
|
<ol>
|
|
<li>
|
|
Validate the input-specification of the job.
|
|
<li>
|
|
Split-up the input file(s) into logical {@link InputSplit}s, each of
|
|
which is then assigned to an individual {@link Mapper}.
|
|
</li>
|
|
<li>
|
|
Provide the {@link RecordReader} implementation to be used to glean
|
|
input records from the logical <code>InputSplit</code> for processing by
|
|
the {@link Mapper}.
|
|
</li>
|
|
</ol>
|
|
|
|
<p>The default behavior of file-based {@link InputFormat}s, typically
|
|
sub-classes of {@link FileInputFormat}, is to split the
|
|
input into <i>logical</i> {@link InputSplit}s based on the total size, in
|
|
bytes, of the input files. However, the {@link FileSystem} blocksize of
|
|
the input files is treated as an upper bound for input splits. A lower bound
|
|
on the split size can be set via
|
|
<a href="{@docRoot}/../mapred-default.html#mapred.min.split.size">
|
|
mapred.min.split.size</a>.</p>
|
|
|
|
<p>Clearly, logical splits based on input-size is insufficient for many
|
|
applications since record boundaries are to respected. In such cases, the
|
|
application has to also implement a {@link RecordReader} on whom lies the
|
|
responsibilty to respect record-boundaries and present a record-oriented
|
|
view of the logical <code>InputSplit</code> to the individual task.
|
|
|
|
@see InputSplit
|
|
@see RecordReader
|
|
@see JobClient
|
|
@see FileInputFormat
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.InputFormat} instead.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.InputFormat -->
|
|
<!-- start interface org.apache.hadoop.mapred.InputSplit -->
|
|
<interface name="InputSplit" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.InputSplit} instead.">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the total number of bytes in the data of the <code>InputSplit</code>.
|
|
|
|
@return the number of bytes in the input split.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the list of hostnames where the input split is located.
|
|
|
|
@return list of hostnames where data of the <code>InputSplit</code> is
|
|
located as an array of <code>String</code>s.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>InputSplit</code> represents the data to be processed by an
|
|
individual {@link Mapper}.
|
|
|
|
<p>Typically, it presents a byte-oriented view on the input and is the
|
|
responsibility of {@link RecordReader} of the job to process this and present
|
|
a record-oriented view.
|
|
|
|
@see InputFormat
|
|
@see RecordReader
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.InputSplit} instead.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.InputSplit -->
|
|
<!-- start class org.apache.hadoop.mapred.InvalidFileTypeException -->
|
|
<class name="InvalidFileTypeException" extends="java.io.IOException"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="InvalidFileTypeException"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="InvalidFileTypeException" type="java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[Used when file type differs from the desired file type. like
|
|
getting a file when a directory is expected. Or a wrong file type.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.InvalidFileTypeException -->
|
|
<!-- start class org.apache.hadoop.mapred.InvalidInputException -->
|
|
<class name="InvalidInputException" extends="java.io.IOException"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="InvalidInputException" type="java.util.List"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create the exception with the given list.
|
|
@param probs the list of problems to report. this list is not copied.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getProblems" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the complete list of the problems reported.
|
|
@return the list of problems, which must not be modified]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMessage" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get a summary message of the problems found.
|
|
@return the concatenated messages from all of the problems.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class wraps a list of problems with the input, so that the user
|
|
can get a list of problems together instead of finding and fixing them one
|
|
by one.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.InvalidInputException -->
|
|
<!-- start class org.apache.hadoop.mapred.InvalidJobConfException -->
|
|
<class name="InvalidJobConfException" extends="java.io.IOException"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="InvalidJobConfException"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="InvalidJobConfException" type="java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[This exception is thrown when jobconf misses some mendatory attributes
|
|
or value of some attributes is invalid.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.InvalidJobConfException -->
|
|
<!-- start class org.apache.hadoop.mapred.IsolationRunner -->
|
|
<class name="IsolationRunner" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="IsolationRunner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Run a single task
|
|
@param args the first argument is the task directory]]>
|
|
</doc>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.IsolationRunner -->
|
|
<!-- start class org.apache.hadoop.mapred.JobClient -->
|
|
<class name="JobClient" extends="org.apache.hadoop.conf.Configured"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.MRConstants"/>
|
|
<implements name="org.apache.hadoop.util.Tool"/>
|
|
<constructor name="JobClient"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a job client.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobClient" type="org.apache.hadoop.mapred.JobConf"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Build a job client with the given {@link JobConf}, and connect to the
|
|
default {@link JobTracker}.
|
|
|
|
@param conf the job configuration.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobClient" type="java.net.InetSocketAddress, org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Build a job client, connect to the indicated job tracker.
|
|
|
|
@param jobTrackAddr the job tracker to connect to.
|
|
@param conf configuration.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="init"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Connect to the default {@link JobTracker}.
|
|
@param conf the job configuration.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close the <code>JobClient</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getFs" return="org.apache.hadoop.fs.FileSystem"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get a filesystem handle. We need this to prepare jobs
|
|
for submission to the MapReduce system.
|
|
|
|
@return the filesystem handle.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="submitJob" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobFile" type="java.lang.String"/>
|
|
<exception name="FileNotFoundException" type="java.io.FileNotFoundException"/>
|
|
<exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Submit a job to the MR system.
|
|
|
|
This returns a handle to the {@link RunningJob} which can be used to track
|
|
the running-job.
|
|
|
|
@param jobFile the job configuration.
|
|
@return a handle to the {@link RunningJob} which can be used to track the
|
|
running-job.
|
|
@throws FileNotFoundException
|
|
@throws InvalidJobConfException
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="submitJob" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="FileNotFoundException" type="java.io.FileNotFoundException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Submit a job to the MR system.
|
|
This returns a handle to the {@link RunningJob} which can be used to track
|
|
the running-job.
|
|
|
|
@param job the job configuration.
|
|
@return a handle to the {@link RunningJob} which can be used to track the
|
|
running-job.
|
|
@throws FileNotFoundException
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="submitJobInternal" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="FileNotFoundException" type="java.io.FileNotFoundException"/>
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Internal method for submitting jobs to the system.
|
|
@param job the configuration to submit
|
|
@return a proxy object for the running job
|
|
@throws FileNotFoundException
|
|
@throws ClassNotFoundException
|
|
@throws InterruptedException
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isJobDirValid" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobDirPath" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Checks if the job directory is clean and has all the required components
|
|
for (re) starting the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJob" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get an {@link RunningJob} object to track an ongoing job. Returns
|
|
null if the id does not correspond to any known job.
|
|
|
|
@param jobid the jobid of the job.
|
|
@return the {@link RunningJob} handle to track the job, null if the
|
|
<code>jobid</code> doesn't correspond to any known job.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJob" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Applications should rather use {@link #getJob(JobID)}.">
|
|
<param name="jobid" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@deprecated Applications should rather use {@link #getJob(JobID)}.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the information of the current state of the map tasks of a job.
|
|
|
|
@param jobId the job to query.
|
|
@return the list of all of the map tips.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Applications should rather use {@link #getMapTaskReports(JobID)}">
|
|
<param name="jobId" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@deprecated Applications should rather use {@link #getMapTaskReports(JobID)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the information of the current state of the reduce tasks of a job.
|
|
|
|
@param jobId the job to query.
|
|
@return the list of all of the reduce tips.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCleanupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the information of the current state of the cleanup tasks of a job.
|
|
|
|
@param jobId the job to query.
|
|
@return the list of all of the cleanup tips.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSetupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the information of the current state of the setup tasks of a job.
|
|
|
|
@param jobId the job to query.
|
|
@return the list of all of the setup tips.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Applications should rather use {@link #getReduceTaskReports(JobID)}">
|
|
<param name="jobId" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@deprecated Applications should rather use {@link #getReduceTaskReports(JobID)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="displayTasks"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<param name="type" type="java.lang.String"/>
|
|
<param name="state" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Display the information about a job's tasks, of a particular type and
|
|
in a particular state
|
|
|
|
@param jobId the ID of the job
|
|
@param type the type of the task (map/reduce/setup/cleanup)
|
|
@param state the state of the task
|
|
(pending/running/completed/failed/killed)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get status information about the Map-Reduce cluster.
|
|
|
|
@return the status information about the Map-Reduce cluster as an object
|
|
of {@link ClusterStatus}.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="detailed" type="boolean"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get status information about the Map-Reduce cluster.
|
|
|
|
@param detailed if true then get a detailed status including the
|
|
tracker names
|
|
@return the status information about the Map-Reduce cluster as an object
|
|
of {@link ClusterStatus}.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="jobsToComplete" return="org.apache.hadoop.mapred.JobStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the jobs that are not completed and not failed.
|
|
|
|
@return array of {@link JobStatus} for the running/to-be-run jobs.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getAllJobs" return="org.apache.hadoop.mapred.JobStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the jobs that are submitted.
|
|
|
|
@return array of {@link JobStatus} for the submitted jobs.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="runJob" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Utility that submits a job, then polls for progress until the job is
|
|
complete.
|
|
|
|
@param job the job configuration.
|
|
@throws IOException if the job fails]]>
|
|
</doc>
|
|
</method>
|
|
<method name="monitorAndPrintJob" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.RunningJob"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Monitor a job and print status in real-time as progress is made and tasks
|
|
fail.
|
|
@param conf the job's configuration
|
|
@param job the job to track
|
|
@return true if the job succeeded
|
|
@throws IOException if communication to the JobTracker fails]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskOutputFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/>
|
|
<doc>
|
|
<![CDATA[Sets the output filter for tasks. only those tasks are printed whose
|
|
output matches the filter.
|
|
@param newValue task filter.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the task output filter out of the JobConf.
|
|
|
|
@param job the JobConf to examine.
|
|
@return the filter level.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskOutputFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/>
|
|
<doc>
|
|
<![CDATA[Modify the JobConf to set the task output filter.
|
|
|
|
@param job the JobConf to modify.
|
|
@param newValue the value to set.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns task output filter.
|
|
@return task filter.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="argv" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<method name="getDefaultMaps" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get status information about the max available Maps in the cluster.
|
|
|
|
@return the max available Maps in the cluster
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDefaultReduces" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get status information about the max available Reduces in the cluster.
|
|
|
|
@return the max available Reduces in the cluster
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSystemDir" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Grab the jobtracker system directory path where job-specific files are to be placed.
|
|
|
|
@return the system directory where job-specific files are to be placed.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Return an array of queue information objects about all the Job Queues
|
|
configured.
|
|
|
|
@return Array of JobQueueInfo objects
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobsFromQueue" return="org.apache.hadoop.mapred.JobStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="queueName" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets all the jobs which were added to particular Job Queue
|
|
|
|
@param queueName name of the Job Queue
|
|
@return Array of jobs present in the job queue
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getQueueInfo" return="org.apache.hadoop.mapred.JobQueueInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="queueName" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets the queue information associated to a particular Job Queue
|
|
|
|
@param queueName name of the job queue.
|
|
@return Queue information associated to particular queue.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="argv" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>JobClient</code> is the primary interface for the user-job to interact
|
|
with the {@link JobTracker}.
|
|
|
|
<code>JobClient</code> provides facilities to submit jobs, track their
|
|
progress, access component-tasks' reports/logs, get the Map-Reduce cluster
|
|
status information etc.
|
|
|
|
<p>The job submission process involves:
|
|
<ol>
|
|
<li>
|
|
Checking the input and output specifications of the job.
|
|
</li>
|
|
<li>
|
|
Computing the {@link InputSplit}s for the job.
|
|
</li>
|
|
<li>
|
|
Setup the requisite accounting information for the {@link DistributedCache}
|
|
of the job, if necessary.
|
|
</li>
|
|
<li>
|
|
Copying the job's jar and configuration to the map-reduce system directory
|
|
on the distributed file-system.
|
|
</li>
|
|
<li>
|
|
Submitting the job to the <code>JobTracker</code> and optionally monitoring
|
|
it's status.
|
|
</li>
|
|
</ol></p>
|
|
|
|
Normally the user creates the application, describes various facets of the
|
|
job via {@link JobConf} and then uses the <code>JobClient</code> to submit
|
|
the job and monitor its progress.
|
|
|
|
<p>Here is an example on how to use <code>JobClient</code>:</p>
|
|
<p><blockquote><pre>
|
|
// Create a new JobConf
|
|
JobConf job = new JobConf(new Configuration(), MyJob.class);
|
|
|
|
// Specify various job-specific parameters
|
|
job.setJobName("myjob");
|
|
|
|
job.setInputPath(new Path("in"));
|
|
job.setOutputPath(new Path("out"));
|
|
|
|
job.setMapperClass(MyJob.MyMapper.class);
|
|
job.setReducerClass(MyJob.MyReducer.class);
|
|
|
|
// Submit the job, then poll for progress until the job is complete
|
|
JobClient.runJob(job);
|
|
</pre></blockquote></p>
|
|
|
|
<h4 id="JobControl">Job Control</h4>
|
|
|
|
<p>At times clients would chain map-reduce jobs to accomplish complex tasks
|
|
which cannot be done via a single map-reduce job. This is fairly easy since
|
|
the output of the job, typically, goes to distributed file-system and that
|
|
can be used as the input for the next job.</p>
|
|
|
|
<p>However, this also means that the onus on ensuring jobs are complete
|
|
(success/failure) lies squarely on the clients. In such situations the
|
|
various job-control options are:
|
|
<ol>
|
|
<li>
|
|
{@link #runJob(JobConf)} : submits the job and returns only after
|
|
the job has completed.
|
|
</li>
|
|
<li>
|
|
{@link #submitJob(JobConf)} : only submits the job, then poll the
|
|
returned handle to the {@link RunningJob} to query status and make
|
|
scheduling decisions.
|
|
</li>
|
|
<li>
|
|
{@link JobConf#setJobEndNotificationURI(String)} : setup a notification
|
|
on job-completion, thus avoiding polling.
|
|
</li>
|
|
</ol></p>
|
|
|
|
@see JobConf
|
|
@see ClusterStatus
|
|
@see Tool
|
|
@see DistributedCache]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobClient -->
|
|
<!-- start class org.apache.hadoop.mapred.JobClient.TaskStatusFilter -->
|
|
<class name="JobClient.TaskStatusFilter" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<field name="NONE" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="KILLED" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="FAILED" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="SUCCEEDED" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="ALL" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobClient.TaskStatusFilter -->
|
|
<!-- start class org.apache.hadoop.mapred.JobConf -->
|
|
<class name="JobConf" extends="org.apache.hadoop.conf.Configuration"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link Configuration} instead">
|
|
<constructor name="JobConf"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a map/reduce job configuration.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobConf" type="java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a map/reduce job configuration.
|
|
|
|
@param exampleClass a class whose containing jar is used as the job's jar.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobConf" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a map/reduce job configuration.
|
|
|
|
@param conf a Configuration whose settings will be inherited.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobConf" type="org.apache.hadoop.conf.Configuration, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a map/reduce job configuration.
|
|
|
|
@param conf a Configuration whose settings will be inherited.
|
|
@param exampleClass a class whose containing jar is used as the job's jar.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobConf" type="java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a map/reduce configuration.
|
|
|
|
@param config a Configuration-format XML job description file.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobConf" type="org.apache.hadoop.fs.Path"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a map/reduce configuration.
|
|
|
|
@param config a Configuration-format XML job description file.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobConf" type="boolean"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[A new map/reduce configuration where the behavior of reading from the
|
|
default resources can be turned off.
|
|
<p/>
|
|
If the parameter {@code loadDefaults} is false, the new instance
|
|
will not load resources from the default files.
|
|
|
|
@param loadDefaults specifies whether to load from the default files]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getJar" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user jar for the map-reduce job.
|
|
|
|
@return the user jar for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJar"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jar" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the user jar for the map-reduce job.
|
|
|
|
@param jar the user jar for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJarByClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the job's jar file by finding an example class location.
|
|
|
|
@param cls the example class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocalDirs" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="deleteLocalFiles"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="deleteLocalFiles"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="subdir" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getLocalPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="pathString" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Constructs a local file name. Files are distributed among configured
|
|
local directories.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUser" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the reported username for this job.
|
|
|
|
@return the username]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setUser"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="user" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the reported username for this job.
|
|
|
|
@param user the username for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setKeepFailedTaskFiles"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="keep" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the framework should keep the intermediate files for
|
|
failed tasks.
|
|
|
|
@param keep <code>true</code> if framework should keep the intermediate files
|
|
for failed tasks, <code>false</code> otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getKeepFailedTaskFiles" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Should the temporary files for failed tasks be kept?
|
|
|
|
@return should the files be kept?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setKeepTaskFilesPattern"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="pattern" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set a regular expression for task names that should be kept.
|
|
The regular expression ".*_m_000123_0" would keep the files
|
|
for the first instance of map 123 that ran.
|
|
|
|
@param pattern the java.util.regex.Pattern to match against the
|
|
task names.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getKeepTaskFilesPattern" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the regular expression that is matched against the task names
|
|
to see if we need to keep the files.
|
|
|
|
@return the pattern as a string, if it was set, othewise null.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setWorkingDirectory"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="dir" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Set the current working directory for the default file system.
|
|
|
|
@param dir the new current working directory.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the current working directory for the default file system.
|
|
|
|
@return the directory name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setNumTasksToExecutePerJvm"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="numTasks" type="int"/>
|
|
<doc>
|
|
<![CDATA[Sets the number of tasks that a spawned task JVM should run
|
|
before it exits
|
|
@param numTasks the number of tasks to execute; defaults to 1;
|
|
-1 signifies no limit]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumTasksToExecutePerJvm" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the number of tasks that a spawned JVM should execute]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputFormat" return="org.apache.hadoop.mapred.InputFormat"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link InputFormat} implementation for the map-reduce job,
|
|
defaults to {@link TextInputFormat} if not specified explicity.
|
|
|
|
@return the {@link InputFormat} implementation for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInputFormat"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link InputFormat} implementation for the map-reduce job.
|
|
|
|
@param theClass the {@link InputFormat} implementation for the map-reduce
|
|
job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputFormat" return="org.apache.hadoop.mapred.OutputFormat"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link OutputFormat} implementation for the map-reduce job,
|
|
defaults to {@link TextOutputFormat} if not specified explicity.
|
|
|
|
@return the {@link OutputFormat} implementation for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputCommitter" return="org.apache.hadoop.mapred.OutputCommitter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link OutputCommitter} implementation for the map-reduce job,
|
|
defaults to {@link FileOutputCommitter} if not specified explicitly.
|
|
|
|
@return the {@link OutputCommitter} implementation for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputCommitter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link OutputCommitter} implementation for the map-reduce job.
|
|
|
|
@param theClass the {@link OutputCommitter} implementation for the map-reduce
|
|
job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputFormat"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link OutputFormat} implementation for the map-reduce job.
|
|
|
|
@param theClass the {@link OutputFormat} implementation for the map-reduce
|
|
job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setCompressMapOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="compress" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Should the map outputs be compressed before transfer?
|
|
Uses the SequenceFile compression.
|
|
|
|
@param compress should the map outputs be compressed?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCompressMapOutput" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Are the outputs of the maps be compressed?
|
|
|
|
@return <code>true</code> if the outputs of the maps are to be compressed,
|
|
<code>false</code> otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapOutputCompressorClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="codecClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the given class as the {@link CompressionCodec} for the map outputs.
|
|
|
|
@param codecClass the {@link CompressionCodec} class that will compress
|
|
the map outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapOutputCompressorClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="defaultValue" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link CompressionCodec} for compressing the map outputs.
|
|
|
|
@param defaultValue the {@link CompressionCodec} to return if not set
|
|
@return the {@link CompressionCodec} class that should be used to compress the
|
|
map outputs.
|
|
@throws IllegalArgumentException if the class was specified, but not found]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the key class for the map output data. If it is not set, use the
|
|
(final) output key class. This allows the map output key class to be
|
|
different than the final output key class.
|
|
|
|
@return the map output key class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapOutputKeyClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the key class for the map output data. This allows the user to
|
|
specify the map output key class to be different than the final output
|
|
value class.
|
|
|
|
@param theClass the map output key class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the value class for the map output data. If it is not set, use the
|
|
(final) output value class This allows the map output value class to be
|
|
different than the final output value class.
|
|
|
|
@return the map output value class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapOutputValueClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the value class for the map output data. This allows the user to
|
|
specify the map output value class to be different than the final output
|
|
value class.
|
|
|
|
@param theClass the map output value class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the key class for the job output data.
|
|
|
|
@return the key class for the job output data.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputKeyClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the key class for the job output data.
|
|
|
|
@param theClass the key class for the job output data.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputKeyComparator" return="org.apache.hadoop.io.RawComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link RawComparator} comparator used to compare keys.
|
|
|
|
@return the {@link RawComparator} comparator used to compare keys.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputKeyComparatorClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link RawComparator} comparator used to compare keys.
|
|
|
|
@param theClass the {@link RawComparator} comparator used to
|
|
compare keys.
|
|
@see #setOutputValueGroupingComparator(Class)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setKeyFieldComparatorOptions"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="keySpec" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link KeyFieldBasedComparator} options used to compare keys.
|
|
|
|
@param keySpec the key specification of the form -k pos1[,pos2], where,
|
|
pos is of the form f[.c][opts], where f is the number
|
|
of the key field to use, and c is the number of the first character from
|
|
the beginning of the field. Fields and character posns are numbered
|
|
starting with 1; a character position of zero in pos2 indicates the
|
|
field's last character. If '.c' is omitted from pos1, it defaults to 1
|
|
(the beginning of the field); if omitted from pos2, it defaults to 0
|
|
(the end of the field). opts are ordering options. The supported options
|
|
are:
|
|
-n, (Sort numerically)
|
|
-r, (Reverse the result of comparison)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getKeyFieldComparatorOption" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link KeyFieldBasedComparator} options]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setKeyFieldPartitionerOptions"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="keySpec" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link KeyFieldBasedPartitioner} options used for
|
|
{@link Partitioner}
|
|
|
|
@param keySpec the key specification of the form -k pos1[,pos2], where,
|
|
pos is of the form f[.c][opts], where f is the number
|
|
of the key field to use, and c is the number of the first character from
|
|
the beginning of the field. Fields and character posns are numbered
|
|
starting with 1; a character position of zero in pos2 indicates the
|
|
field's last character. If '.c' is omitted from pos1, it defaults to 1
|
|
(the beginning of the field); if omitted from pos2, it defaults to 0
|
|
(the end of the field).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getKeyFieldPartitionerOption" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link KeyFieldBasedPartitioner} options]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputValueGroupingComparator" return="org.apache.hadoop.io.RawComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user defined {@link WritableComparable} comparator for
|
|
grouping keys of inputs to the reduce.
|
|
|
|
@return comparator set by the user for grouping values.
|
|
@see #setOutputValueGroupingComparator(Class) for details.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputValueGroupingComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the user defined {@link RawComparator} comparator for
|
|
grouping keys in the input to the reduce.
|
|
|
|
<p>This comparator should be provided if the equivalence rules for keys
|
|
for sorting the intermediates are different from those for grouping keys
|
|
before each call to
|
|
{@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.</p>
|
|
|
|
<p>For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed
|
|
in a single call to the reduce function if K1 and K2 compare as equal.</p>
|
|
|
|
<p>Since {@link #setOutputKeyComparatorClass(Class)} can be used to control
|
|
how keys are sorted, this can be used in conjunction to simulate
|
|
<i>secondary sort on values</i>.</p>
|
|
|
|
<p><i>Note</i>: This is not a guarantee of the reduce sort being
|
|
<i>stable</i> in any sense. (In any case, with the order of available
|
|
map-outputs to the reduce being non-deterministic, it wouldn't make
|
|
that much sense.)</p>
|
|
|
|
@param theClass the comparator class to be used for grouping keys.
|
|
It should implement <code>RawComparator</code>.
|
|
@see #setOutputKeyComparatorClass(Class)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUseNewMapper" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Should the framework use the new context-object code for running
|
|
the mapper?
|
|
@return true, if the new api should be used]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setUseNewMapper"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="flag" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the framework should use the new api for the mapper.
|
|
This is the default for jobs submitted with the new Job api.
|
|
@param flag true, if the new api should be used]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUseNewReducer" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Should the framework use the new context-object code for running
|
|
the reducer?
|
|
@return true, if the new api should be used]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setUseNewReducer"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="flag" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the framework should use the new api for the reducer.
|
|
This is the default for jobs submitted with the new Job api.
|
|
@param flag true, if the new api should be used]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the value class for job outputs.
|
|
|
|
@return the value class for job outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputValueClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the value class for job outputs.
|
|
|
|
@param theClass the value class for job outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapperClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link Mapper} class for the job.
|
|
|
|
@return the {@link Mapper} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapperClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Mapper} class for the job.
|
|
|
|
@param theClass the {@link Mapper} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapRunnerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link MapRunnable} class for the job.
|
|
|
|
@return the {@link MapRunnable} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapRunnerClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Expert: Set the {@link MapRunnable} class for the job.
|
|
|
|
Typically used to exert greater control on {@link Mapper}s.
|
|
|
|
@param theClass the {@link MapRunnable} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPartitionerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link Partitioner} used to partition {@link Mapper}-outputs
|
|
to be sent to the {@link Reducer}s.
|
|
|
|
@return the {@link Partitioner} used to partition map-outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setPartitionerClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Partitioner} class used to partition
|
|
{@link Mapper}-outputs to be sent to the {@link Reducer}s.
|
|
|
|
@param theClass the {@link Partitioner} used to partition map-outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReducerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link Reducer} class for the job.
|
|
|
|
@return the {@link Reducer} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setReducerClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Reducer} class for the job.
|
|
|
|
@param theClass the {@link Reducer} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user-defined <i>combiner</i> class used to combine map-outputs
|
|
before being sent to the reducers. Typically the combiner is same as the
|
|
the {@link Reducer} for the job i.e. {@link #getReducerClass()}.
|
|
|
|
@return the user-defined combiner class used to combine map-outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setCombinerClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the user-defined <i>combiner</i> class used to combine map-outputs
|
|
before being sent to the reducers.
|
|
|
|
<p>The combiner is an application-specified aggregation operation, which
|
|
can help cut down the amount of data transferred between the
|
|
{@link Mapper} and the {@link Reducer}, leading to better performance.</p>
|
|
|
|
<p>The framework may invoke the combiner 0, 1, or multiple times, in both
|
|
the mapper and reducer tasks. In general, the combiner is called as the
|
|
sort/merge result is written to disk. The combiner must:
|
|
<ul>
|
|
<li> be side-effect free</li>
|
|
<li> have the same input and output key types and the same input and
|
|
output value types</li>
|
|
</ul></p>
|
|
|
|
<p>Typically the combiner is same as the <code>Reducer</code> for the
|
|
job i.e. {@link #setReducerClass(Class)}.</p>
|
|
|
|
@param theClass the user-defined combiner class used to combine
|
|
map-outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSpeculativeExecution" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Should speculative execution be used for this job?
|
|
Defaults to <code>true</code>.
|
|
|
|
@return <code>true</code> if speculative execution be used for this job,
|
|
<code>false</code> otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSpeculativeExecution"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="speculativeExecution" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Turn speculative execution on or off for this job.
|
|
|
|
@param speculativeExecution <code>true</code> if speculative execution
|
|
should be turned on, else <code>false</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapSpeculativeExecution" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Should speculative execution be used for this job for map tasks?
|
|
Defaults to <code>true</code>.
|
|
|
|
@return <code>true</code> if speculative execution be
|
|
used for this job for map tasks,
|
|
<code>false</code> otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapSpeculativeExecution"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="speculativeExecution" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Turn speculative execution on or off for this job for map tasks.
|
|
|
|
@param speculativeExecution <code>true</code> if speculative execution
|
|
should be turned on for map tasks,
|
|
else <code>false</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReduceSpeculativeExecution" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Should speculative execution be used for this job for reduce tasks?
|
|
Defaults to <code>true</code>.
|
|
|
|
@return <code>true</code> if speculative execution be used
|
|
for reduce tasks for this job,
|
|
<code>false</code> otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setReduceSpeculativeExecution"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="speculativeExecution" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Turn speculative execution on or off for this job for reduce tasks.
|
|
|
|
@param speculativeExecution <code>true</code> if speculative execution
|
|
should be turned on for reduce tasks,
|
|
else <code>false</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumMapTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get configured the number of reduce tasks for this job.
|
|
Defaults to <code>1</code>.
|
|
|
|
@return the number of reduce tasks for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setNumMapTasks"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="n" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the number of map tasks for this job.
|
|
|
|
<p><i>Note</i>: This is only a <i>hint</i> to the framework. The actual
|
|
number of spawned map tasks depends on the number of {@link InputSplit}s
|
|
generated by the job's {@link InputFormat#getSplits(JobConf, int)}.
|
|
|
|
A custom {@link InputFormat} is typically used to accurately control
|
|
the number of map tasks for the job.</p>
|
|
|
|
<h4 id="NoOfMaps">How many maps?</h4>
|
|
|
|
<p>The number of maps is usually driven by the total size of the inputs
|
|
i.e. total number of blocks of the input files.</p>
|
|
|
|
<p>The right level of parallelism for maps seems to be around 10-100 maps
|
|
per-node, although it has been set up to 300 or so for very cpu-light map
|
|
tasks. Task setup takes awhile, so it is best if the maps take at least a
|
|
minute to execute.</p>
|
|
|
|
<p>The default behavior of file-based {@link InputFormat}s is to split the
|
|
input into <i>logical</i> {@link InputSplit}s based on the total size, in
|
|
bytes, of input files. However, the {@link FileSystem} blocksize of the
|
|
input files is treated as an upper bound for input splits. A lower bound
|
|
on the split size can be set via
|
|
<a href="{@docRoot}/../mapred-default.html#mapred.min.split.size">
|
|
mapred.min.split.size</a>.</p>
|
|
|
|
<p>Thus, if you expect 10TB of input data and have a blocksize of 128MB,
|
|
you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is
|
|
used to set it even higher.</p>
|
|
|
|
@param n the number of map tasks for this job.
|
|
@see InputFormat#getSplits(JobConf, int)
|
|
@see FileInputFormat
|
|
@see FileSystem#getDefaultBlockSize()
|
|
@see FileStatus#getBlockSize()]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumReduceTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get configured the number of reduce tasks for this job. Defaults to
|
|
<code>1</code>.
|
|
|
|
@return the number of reduce tasks for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setNumReduceTasks"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="n" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the requisite number of reduce tasks for this job.
|
|
|
|
<h4 id="NoOfReduces">How many reduces?</h4>
|
|
|
|
<p>The right number of reduces seems to be <code>0.95</code> or
|
|
<code>1.75</code> multiplied by (<<i>no. of nodes</i>> *
|
|
<a href="{@docRoot}/../mapred-default.html#mapred.tasktracker.reduce.tasks.maximum">
|
|
mapred.tasktracker.reduce.tasks.maximum</a>).
|
|
</p>
|
|
|
|
<p>With <code>0.95</code> all of the reduces can launch immediately and
|
|
start transfering map outputs as the maps finish. With <code>1.75</code>
|
|
the faster nodes will finish their first round of reduces and launch a
|
|
second wave of reduces doing a much better job of load balancing.</p>
|
|
|
|
<p>Increasing the number of reduces increases the framework overhead, but
|
|
increases load balancing and lowers the cost of failures.</p>
|
|
|
|
<p>The scaling factors above are slightly less than whole numbers to
|
|
reserve a few reduce slots in the framework for speculative-tasks, failures
|
|
etc.</p>
|
|
|
|
<h4 id="ReducerNone">Reducer NONE</h4>
|
|
|
|
<p>It is legal to set the number of reduce-tasks to <code>zero</code>.</p>
|
|
|
|
<p>In this case the output of the map-tasks directly go to distributed
|
|
file-system, to the path set by
|
|
{@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the
|
|
framework doesn't sort the map-outputs before writing it out to HDFS.</p>
|
|
|
|
@param n the number of reduce tasks for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxMapAttempts" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the configured number of maximum attempts that will be made to run a
|
|
map task, as specified by the <code>mapred.map.max.attempts</code>
|
|
property. If this property is not already set, the default is 4 attempts.
|
|
|
|
@return the max number of attempts per map task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxMapAttempts"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="n" type="int"/>
|
|
<doc>
|
|
<![CDATA[Expert: Set the number of maximum attempts that will be made to run a
|
|
map task.
|
|
|
|
@param n the number of attempts per map task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxReduceAttempts" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the configured number of maximum attempts that will be made to run a
|
|
reduce task, as specified by the <code>mapred.reduce.max.attempts</code>
|
|
property. If this property is not already set, the default is 4 attempts.
|
|
|
|
@return the max number of attempts per reduce task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxReduceAttempts"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="n" type="int"/>
|
|
<doc>
|
|
<![CDATA[Expert: Set the number of maximum attempts that will be made to run a
|
|
reduce task.
|
|
|
|
@param n the number of attempts per reduce task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user-specified job name. This is only used to identify the
|
|
job to the user.
|
|
|
|
@return the job's name, defaulting to "".]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the user-specified job name.
|
|
|
|
@param name the job's new name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSessionId" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user-specified session identifier. The default is the empty string.
|
|
|
|
The session identifier is used to tag metric data that is reported to some
|
|
performance metrics system via the org.apache.hadoop.metrics API. The
|
|
session identifier is intended, in particular, for use by Hadoop-On-Demand
|
|
(HOD) which allocates a virtual Hadoop cluster dynamically and transiently.
|
|
HOD will set the session identifier by modifying the mapred-site.xml file
|
|
before starting the cluster.
|
|
|
|
When not running under HOD, this identifer is expected to remain set to
|
|
the empty string.
|
|
|
|
@return the session identifier, defaulting to "".]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSessionId"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="sessionId" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the user-specified session identifier.
|
|
|
|
@param sessionId the new session id.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxTaskFailuresPerTracker"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="noFailures" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the maximum no. of failures of a given job per tasktracker.
|
|
If the no. of task failures exceeds <code>noFailures</code>, the
|
|
tasktracker is <i>blacklisted</i> for this job.
|
|
|
|
@param noFailures maximum no. of failures of a given job per tasktracker.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxTaskFailuresPerTracker" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Expert: Get the maximum no. of failures of a given job per tasktracker.
|
|
If the no. of task failures exceeds this, the tasktracker is
|
|
<i>blacklisted</i> for this job.
|
|
|
|
@return the maximum no. of failures of a given job per tasktracker.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxMapTaskFailuresPercent" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the maximum percentage of map tasks that can fail without
|
|
the job being aborted.
|
|
|
|
Each map task is executed a minimum of {@link #getMaxMapAttempts()}
|
|
attempts before being declared as <i>failed</i>.
|
|
|
|
Defaults to <code>zero</code>, i.e. <i>any</i> failed map-task results in
|
|
the job being declared as {@link JobStatus#FAILED}.
|
|
|
|
@return the maximum percentage of map tasks that can fail without
|
|
the job being aborted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxMapTaskFailuresPercent"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="percent" type="int"/>
|
|
<doc>
|
|
<![CDATA[Expert: Set the maximum percentage of map tasks that can fail without the
|
|
job being aborted.
|
|
|
|
Each map task is executed a minimum of {@link #getMaxMapAttempts} attempts
|
|
before being declared as <i>failed</i>.
|
|
|
|
@param percent the maximum percentage of map tasks that can fail without
|
|
the job being aborted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxReduceTaskFailuresPercent" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the maximum percentage of reduce tasks that can fail without
|
|
the job being aborted.
|
|
|
|
Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()}
|
|
attempts before being declared as <i>failed</i>.
|
|
|
|
Defaults to <code>zero</code>, i.e. <i>any</i> failed reduce-task results
|
|
in the job being declared as {@link JobStatus#FAILED}.
|
|
|
|
@return the maximum percentage of reduce tasks that can fail without
|
|
the job being aborted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxReduceTaskFailuresPercent"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="percent" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the maximum percentage of reduce tasks that can fail without the job
|
|
being aborted.
|
|
|
|
Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()}
|
|
attempts before being declared as <i>failed</i>.
|
|
|
|
@param percent the maximum percentage of reduce tasks that can fail without
|
|
the job being aborted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobPriority"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="prio" type="org.apache.hadoop.mapred.JobPriority"/>
|
|
<doc>
|
|
<![CDATA[Set {@link JobPriority} for this job.
|
|
|
|
@param prio the {@link JobPriority} for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobPriority" return="org.apache.hadoop.mapred.JobPriority"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link JobPriority} for this job.
|
|
|
|
@return the {@link JobPriority} for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProfileEnabled" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get whether the task profiling is enabled.
|
|
@return true if some tasks will be profiled]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setProfileEnabled"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="newValue" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the system should collect profiler information for some of
|
|
the tasks in this job? The information is stored in the user log
|
|
directory.
|
|
@param newValue true means it should be gathered]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProfileParams" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the profiler configuration arguments.
|
|
|
|
The default value for this property is
|
|
"-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s"
|
|
|
|
@return the parameters to pass to the task child to configure profiling]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setProfileParams"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="value" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the profiler configuration arguments. If the string contains a '%s' it
|
|
will be replaced with the name of the profiling output file when the task
|
|
runs.
|
|
|
|
This value is passed to the task child JVM on the command line.
|
|
|
|
@param value the configuration string]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProfileTaskRange" return="org.apache.hadoop.conf.Configuration.IntegerRanges"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="isMap" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Get the range of maps or reduces to profile.
|
|
@param isMap is the task a map?
|
|
@return the task ranges]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setProfileTaskRange"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="isMap" type="boolean"/>
|
|
<param name="newValue" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the ranges of maps or reduces to profile. setProfileEnabled(true)
|
|
must also be called.
|
|
@param newValue a set of integer ranges of the map ids]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapDebugScript"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="mDbgScript" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the debug script to run when the map tasks fail.
|
|
|
|
<p>The debug script can aid debugging of failed map tasks. The script is
|
|
given task's stdout, stderr, syslog, jobconf files as arguments.</p>
|
|
|
|
<p>The debug command, run on the node where the map failed, is:</p>
|
|
<p><pre><blockquote>
|
|
$script $stdout $stderr $syslog $jobconf.
|
|
</blockquote></pre></p>
|
|
|
|
<p> The script file is distributed through {@link DistributedCache}
|
|
APIs. The script needs to be symlinked. </p>
|
|
|
|
<p>Here is an example on how to submit a script
|
|
<p><blockquote><pre>
|
|
job.setMapDebugScript("./myscript");
|
|
DistributedCache.createSymlink(job);
|
|
DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
|
|
</pre></blockquote></p>
|
|
|
|
@param mDbgScript the script name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapDebugScript" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the map task's debug script.
|
|
|
|
@return the debug Script for the mapred job for failed map tasks.
|
|
@see #setMapDebugScript(String)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setReduceDebugScript"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="rDbgScript" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the debug script to run when the reduce tasks fail.
|
|
|
|
<p>The debug script can aid debugging of failed reduce tasks. The script
|
|
is given task's stdout, stderr, syslog, jobconf files as arguments.</p>
|
|
|
|
<p>The debug command, run on the node where the map failed, is:</p>
|
|
<p><pre><blockquote>
|
|
$script $stdout $stderr $syslog $jobconf.
|
|
</blockquote></pre></p>
|
|
|
|
<p> The script file is distributed through {@link DistributedCache}
|
|
APIs. The script file needs to be symlinked </p>
|
|
|
|
<p>Here is an example on how to submit a script
|
|
<p><blockquote><pre>
|
|
job.setReduceDebugScript("./myscript");
|
|
DistributedCache.createSymlink(job);
|
|
DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
|
|
</pre></blockquote></p>
|
|
|
|
@param rDbgScript the script name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReduceDebugScript" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the reduce task's debug Script
|
|
|
|
@return the debug script for the mapred job for failed reduce tasks.
|
|
@see #setReduceDebugScript(String)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobEndNotificationURI" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the uri to be invoked in-order to send a notification after the job
|
|
has completed (success/failure).
|
|
|
|
@return the job end notification uri, <code>null</code> if it hasn't
|
|
been set.
|
|
@see #setJobEndNotificationURI(String)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobEndNotificationURI"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="uri" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the uri to be invoked in-order to send a notification after the job
|
|
has completed (success/failure).
|
|
|
|
<p>The uri can contain 2 special parameters: <tt>$jobId</tt> and
|
|
<tt>$jobStatus</tt>. Those, if present, are replaced by the job's
|
|
identifier and completion-status respectively.</p>
|
|
|
|
<p>This is typically used by application-writers to implement chaining of
|
|
Map-Reduce jobs in an <i>asynchronous manner</i>.</p>
|
|
|
|
@param uri the job end notification uri
|
|
@see JobStatus
|
|
@see <a href="{@docRoot}/org/apache/hadoop/mapred/JobClient.html#JobCompletionAndChaining">Job Completion and Chaining</a>]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobLocalDir" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get job-specific shared directory for use as scratch space
|
|
|
|
<p>
|
|
When a job starts, a shared directory is created at location
|
|
<code>
|
|
${mapred.local.dir}/taskTracker/jobcache/$jobid/work/ </code>.
|
|
This directory is exposed to the users through
|
|
<code>job.local.dir </code>.
|
|
So, the tasks can use this space
|
|
as scratch space and share files among them. </p>
|
|
This value is available as System property also.
|
|
|
|
@return The localized job specific shared directory]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxVirtualMemoryForTask" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The maximum amount of memory any task of this job will use. See
|
|
{@link #MAPRED_TASK_MAXVMEM_PROPERTY}
|
|
|
|
@return The maximum amount of memory any task of this job will use, in
|
|
bytes.
|
|
@see #setMaxVirtualMemoryForTask(long)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxVirtualMemoryForTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="vmem" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the maximum amount of memory any task of this job can use. See
|
|
{@link #MAPRED_TASK_MAXVMEM_PROPERTY}
|
|
|
|
@param vmem Maximum amount of virtual memory in bytes any task of this job
|
|
can use.
|
|
@see #getMaxVirtualMemoryForTask()]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxPhysicalMemoryForTask" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The maximum amount of physical memory any task of this job will use. See
|
|
{@link #MAPRED_TASK_MAXPMEM_PROPERTY}
|
|
|
|
@return The maximum amount of physical memory any task of this job will
|
|
use, in bytes.
|
|
@see #setMaxPhysicalMemoryForTask(long)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxPhysicalMemoryForTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="pmem" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the maximum amount of physical memory any task of this job can use. See
|
|
{@link #MAPRED_TASK_MAXPMEM_PROPERTY}
|
|
|
|
@param pmem Maximum amount of physical memory in bytes any task of this job
|
|
can use.
|
|
@see #getMaxPhysicalMemoryForTask()]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getQueueName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the name of the queue to which this job is submitted.
|
|
Defaults to 'default'.
|
|
|
|
@return name of the queue]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setQueueName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="queueName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the name of the queue to which this job should be submitted.
|
|
|
|
@param queueName Name of the queue]]>
|
|
</doc>
|
|
</method>
|
|
<field name="DISABLED_MEMORY_LIMIT" type="long"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[A value which if set for memory related configuration options,
|
|
indicates that the options are turned off.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="DEFAULT_QUEUE_NAME" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Name of the queue to which jobs will be submitted, if no queue
|
|
name is mentioned.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Cluster-wide configuration to be set by the administrators that provides
|
|
default amount of maximum virtual memory for job's tasks. This has to be
|
|
set on both the JobTracker node for the sake of scheduling decisions and on
|
|
the TaskTracker nodes for the sake of memory management.
|
|
|
|
<p>
|
|
|
|
If a job doesn't specify its virtual memory requirement by setting
|
|
{@link #MAPRED_TASK_MAXVMEM_PROPERTY} to {@link #DISABLED_MEMORY_LIMIT},
|
|
tasks are assured a memory limit set to this property. This property is
|
|
disabled by default, and if not explicitly set to a valid value by the
|
|
administrators and if a job doesn't specify its virtual memory
|
|
requirements, the job's tasks will not be assured anything and may be
|
|
killed by a TT that intends to control the total memory usage of the tasks
|
|
via memory management functionality.
|
|
|
|
<p>
|
|
|
|
This value should in general be less than the cluster-wide configuration
|
|
{@link #UPPER_LIMIT_ON_TASK_VMEM_PROPERTY} . If not or if it not set,
|
|
TaskTracker's memory management may be disabled and a scheduler's memory
|
|
based scheduling decisions will be affected. Please refer to the
|
|
documentation of the configured scheduler to see how this property is used.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_TASK_MAXVMEM_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The maximum amount of memory any task of this job will use.
|
|
|
|
<p>
|
|
|
|
This value will be used by TaskTrackers for monitoring the memory usage of
|
|
tasks of this jobs. If a TaskTracker's memory management functionality is
|
|
enabled, each task of this job will be allowed to use a maximum virtual
|
|
memory specified by this property. If the task's memory usage goes over
|
|
this value, the task will be failed by the TT. If not set, the cluster-wide
|
|
configuration {@link #MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY} is used as the
|
|
default value for memory requirements. If this property cascaded with
|
|
{@link #MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY} becomes equal to -1, job's
|
|
tasks will not be assured anything and may be killed by a TT that intends
|
|
to control the total memory usage of the tasks via memory management
|
|
functionality. If the memory management functionality is disabled on a TT,
|
|
this value is ignored.
|
|
|
|
<p>
|
|
|
|
This value should also be not more than the cluster-wide configuration
|
|
{@link #UPPER_LIMIT_ON_TASK_VMEM_PROPERTY} which has to be set by the site
|
|
administrators.
|
|
|
|
<p>
|
|
|
|
This value may be used by schedulers that support scheduling based on job's
|
|
memory requirements. In general, a task of this job will be scheduled on a
|
|
TaskTracker only if the amount of virtual memory still unoccupied on the
|
|
TaskTracker is greater than or equal to this value. But different
|
|
schedulers can take different decisions. Please refer to the documentation
|
|
of the scheduler being configured to see if it does memory based scheduling
|
|
and if it does, how this property is used by that scheduler.
|
|
|
|
@see #setMaxVirtualMemoryForTask(long)
|
|
@see #getMaxVirtualMemoryForTask()]]>
|
|
</doc>
|
|
</field>
|
|
<field name="MAPRED_TASK_MAXPMEM_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The maximum amount of physical memory any task of a job will use.
|
|
|
|
<p>
|
|
|
|
This value may be used by schedulers that support scheduling based on job's
|
|
memory requirements. In general, a task of this job will be scheduled on a
|
|
TaskTracker, only if the amount of physical memory still unoccupied on the
|
|
TaskTracker is greater than or equal to this value. But different
|
|
schedulers can take different decisions. Please refer to the documentation
|
|
of the scheduler being configured to see how it does memory based
|
|
scheduling and how this variable is used by that scheduler.
|
|
|
|
@see #setMaxPhysicalMemoryForTask(long)
|
|
@see #getMaxPhysicalMemoryForTask()]]>
|
|
</doc>
|
|
</field>
|
|
<field name="UPPER_LIMIT_ON_TASK_VMEM_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Cluster-wide configuration to be set by the site administrators that
|
|
provides an upper limit on the maximum virtual memory that can be specified
|
|
by a job. The job configuration {@link #MAPRED_TASK_MAXVMEM_PROPERTY} and
|
|
the cluster-wide configuration
|
|
{@link #MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY} should, by definition, be
|
|
less than this value. If the job configuration
|
|
{@link #MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY} is more than this value,
|
|
depending on the scheduler being configured, the job may be rejected or the
|
|
job configuration may just be ignored.
|
|
|
|
<p>
|
|
|
|
If it is not set on a TaskTracker, TaskTracker's memory management will be
|
|
disabled.]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A map/reduce job configuration.
|
|
|
|
<p><code>JobConf</code> is the primary interface for a user to describe a
|
|
map-reduce job to the Hadoop framework for execution. The framework tries to
|
|
faithfully execute the job as-is described by <code>JobConf</code>, however:
|
|
<ol>
|
|
<li>
|
|
Some configuration parameters might have been marked as
|
|
<a href="{@docRoot}/org/apache/hadoop/conf/Configuration.html#FinalParams">
|
|
final</a> by administrators and hence cannot be altered.
|
|
</li>
|
|
<li>
|
|
While some job parameters are straight-forward to set
|
|
(e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly
|
|
rest of the framework and/or job-configuration and is relatively more
|
|
complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}).
|
|
</li>
|
|
</ol></p>
|
|
|
|
<p><code>JobConf</code> typically specifies the {@link Mapper}, combiner
|
|
(if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and
|
|
{@link OutputFormat} implementations to be used etc.
|
|
|
|
<p>Optionally <code>JobConf</code> is used to specify other advanced facets
|
|
of the job such as <code>Comparator</code>s to be used, files to be put in
|
|
the {@link DistributedCache}, whether or not intermediate and/or job outputs
|
|
are to be compressed (and how), debugability via user-provided scripts
|
|
( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}),
|
|
for doing post-processing on task logs, task's stdout, stderr, syslog.
|
|
and etc.</p>
|
|
|
|
<p>Here is an example on how to configure a job via <code>JobConf</code>:</p>
|
|
<p><blockquote><pre>
|
|
// Create a new JobConf
|
|
JobConf job = new JobConf(new Configuration(), MyJob.class);
|
|
|
|
// Specify various job-specific parameters
|
|
job.setJobName("myjob");
|
|
|
|
FileInputFormat.setInputPaths(job, new Path("in"));
|
|
FileOutputFormat.setOutputPath(job, new Path("out"));
|
|
|
|
job.setMapperClass(MyJob.MyMapper.class);
|
|
job.setCombinerClass(MyJob.MyReducer.class);
|
|
job.setReducerClass(MyJob.MyReducer.class);
|
|
|
|
job.setInputFormat(SequenceFileInputFormat.class);
|
|
job.setOutputFormat(SequenceFileOutputFormat.class);
|
|
</pre></blockquote></p>
|
|
|
|
@see JobClient
|
|
@see ClusterStatus
|
|
@see Tool
|
|
@see DistributedCache
|
|
@deprecated Use {@link Configuration} instead]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobConf -->
|
|
<!-- start interface org.apache.hadoop.mapred.JobConfigurable -->
|
|
<interface name="JobConfigurable" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Initializes a new instance from a {@link JobConf}.
|
|
|
|
@param job the configuration]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[That what may be configured.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.JobConfigurable -->
|
|
<!-- start class org.apache.hadoop.mapred.JobContext -->
|
|
<class name="JobContext" extends="org.apache.hadoop.mapreduce.JobContext"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.JobContext} instead.">
|
|
<method name="getJobConf" return="org.apache.hadoop.mapred.JobConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the job Configuration
|
|
|
|
@return JobConf]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgressible" return="org.apache.hadoop.util.Progressable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the progress mechanism for reporting progress.
|
|
|
|
@return progress mechanism]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[@deprecated Use {@link org.apache.hadoop.mapreduce.JobContext} instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobContext -->
|
|
<!-- start class org.apache.hadoop.mapred.JobEndNotifier -->
|
|
<class name="JobEndNotifier" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobEndNotifier"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="startNotifier"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="stopNotifier"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="registerNotification"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="status" type="org.apache.hadoop.mapred.JobStatus"/>
|
|
</method>
|
|
<method name="localRunnerNotification"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="status" type="org.apache.hadoop.mapred.JobStatus"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobEndNotifier -->
|
|
<!-- start class org.apache.hadoop.mapred.JobHistory -->
|
|
<class name="JobHistory" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobHistory"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="init" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="hostname" type="java.lang.String"/>
|
|
<param name="jobTrackerStartTime" type="long"/>
|
|
<doc>
|
|
<![CDATA[Initialize JobHistory files.
|
|
@param conf Jobconf of the job tracker.
|
|
@param hostname jobtracker's hostname
|
|
@param jobTrackerStartTime jobtracker's start time
|
|
@return true if intialized properly
|
|
false otherwise]]>
|
|
</doc>
|
|
</method>
|
|
<method name="parseHistoryFromFS"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="path" type="java.lang.String"/>
|
|
<param name="l" type="org.apache.hadoop.mapred.JobHistory.Listener"/>
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Parses history file and invokes Listener.handle() for
|
|
each line of history. It can be used for looking through history
|
|
files for specific items without having to keep whole history in memory.
|
|
@param path path to history file
|
|
@param l Listener for history events
|
|
@param fs FileSystem where history file is present
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isDisableHistory" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns history disable status. by default history is enabled so this
|
|
method returns false.
|
|
@return true if history logging is disabled, false otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setDisableHistory"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="disableHistory" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Enable/disable history logging. Default value is false, so history
|
|
is enabled by default.
|
|
@param disableHistory true if history should be disabled, false otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskLogsUrl" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="attempt" type="org.apache.hadoop.mapred.JobHistory.TaskAttempt"/>
|
|
<doc>
|
|
<![CDATA[Return the TaskLogsUrl of a particular TaskAttempt
|
|
|
|
@param attempt
|
|
@return the taskLogsUrl. null if http-port or tracker-name or
|
|
task-attempt-id are unavailable.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="JOB_NAME_TRIM_LENGTH" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[Provides methods for writing to and reading from job history.
|
|
Job History works in an append mode, JobHistory and its inner classes provide methods
|
|
to log job events.
|
|
|
|
JobHistory is split into multiple files, format of each file is plain text where each line
|
|
is of the format [type (key=value)*], where type identifies the type of the record.
|
|
Type maps to UID of one of the inner classes of this class.
|
|
|
|
Job history is maintained in a master index which contains star/stop times of all jobs with
|
|
a few other job level properties. Apart from this each job's history is maintained in a seperate history
|
|
file. name of job history files follows the format jobtrackerId_jobid
|
|
|
|
For parsing the job history it supports a listener based interface where each line is parsed
|
|
and passed to listener. The listener can create an object model of history or look for specific
|
|
events and discard rest of the history.
|
|
|
|
CHANGE LOG :
|
|
Version 0 : The history has the following format :
|
|
TAG KEY1="VALUE1" KEY2="VALUE2" and so on.
|
|
TAG can be Job, Task, MapAttempt or ReduceAttempt.
|
|
Note that a '"' is the line delimiter.
|
|
Version 1 : Changes the line delimiter to '.'
|
|
Values are now escaped for unambiguous parsing.
|
|
Added the Meta tag to store version info.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobHistory -->
|
|
<!-- start class org.apache.hadoop.mapred.JobHistory.HistoryCleaner -->
|
|
<class name="JobHistory.HistoryCleaner" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="java.lang.Runnable"/>
|
|
<constructor name="JobHistory.HistoryCleaner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Cleans up history data.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Delete history files older than one month. Update master index and remove all
|
|
jobs older than one month. Also if a job tracker has no jobs in last one month
|
|
remove reference to the job tracker.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobHistory.HistoryCleaner -->
|
|
<!-- start class org.apache.hadoop.mapred.JobHistory.JobInfo -->
|
|
<class name="JobHistory.JobInfo" extends="org.apache.hadoop.mapred.JobHistory.KeyValuePair"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobHistory.JobInfo" type="java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create new JobInfo]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getAllTasks" return="java.util.Map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns all map and reduce tasks <taskid-Task>.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocalJobFilePath" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<doc>
|
|
<![CDATA[Get the path of the locally stored job file
|
|
@param jobId id of the job
|
|
@return the path of the job file on the local file system]]>
|
|
</doc>
|
|
</method>
|
|
<method name="encodeJobHistoryFilePath" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="logFile" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Helper function to encode the URL of the path of the job-history
|
|
log file.
|
|
|
|
@param logFile path of the job-history file
|
|
@return URL encoded path
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="encodeJobHistoryFileName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="logFileName" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Helper function to encode the URL of the filename of the job-history
|
|
log file.
|
|
|
|
@param logFileName file name of the job-history file
|
|
@return URL encoded filename
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="decodeJobHistoryFileName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="logFileName" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Helper function to decode the URL of the filename of the job-history
|
|
log file.
|
|
|
|
@param logFileName file name of the job-history file
|
|
@return URL decoded filename
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUserName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the user name from the job conf]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobHistoryLogLocation" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="logFileName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Get the job history file path given the history filename]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobHistoryLogLocationForUser" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="logFileName" type="java.lang.String"/>
|
|
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the user job history file path]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobHistoryFileName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="id" type="org.apache.hadoop.mapred.JobID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Recover the job history filename from the history folder.
|
|
Uses the following pattern
|
|
$jt-hostname_[0-9]*_$job-id_$user-$job-name*
|
|
@param jobConf the job conf
|
|
@param id job id]]>
|
|
</doc>
|
|
</method>
|
|
<method name="recoverJobHistoryFile" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="logFilePath" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Selects one of the two files generated as a part of recovery.
|
|
The thumb rule is that always select the oldest file.
|
|
This call makes sure that only one file is left in the end.
|
|
@param conf job conf
|
|
@param logFilePath Path of the log file
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logSubmitted"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="jobConfPath" type="java.lang.String"/>
|
|
<param name="submitTime" type="long"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Log job submitted event to history. Creates a new file in history
|
|
for the job. if history file creation fails, it disables history
|
|
for all other events.
|
|
@param jobId job id assigned by job tracker.
|
|
@param jobConf job conf of the job
|
|
@param jobConfPath path to job conf xml file in HDFS.
|
|
@param submitTime time when job tracker received the job
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logInited"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<param name="startTime" type="long"/>
|
|
<param name="totalMaps" type="int"/>
|
|
<param name="totalReduces" type="int"/>
|
|
<doc>
|
|
<![CDATA[Logs launch time of job.
|
|
|
|
@param jobId job id, assigned by jobtracker.
|
|
@param startTime start time of job.
|
|
@param totalMaps total maps assigned by jobtracker.
|
|
@param totalReduces total reduces.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logStarted"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="Use {@link #logInited(JobID, long, int, int)} and
|
|
{@link #logStarted(JobID)}">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<param name="startTime" type="long"/>
|
|
<param name="totalMaps" type="int"/>
|
|
<param name="totalReduces" type="int"/>
|
|
<doc>
|
|
<![CDATA[Logs the job as RUNNING.
|
|
|
|
@param jobId job id, assigned by jobtracker.
|
|
@param startTime start time of job.
|
|
@param totalMaps total maps assigned by jobtracker.
|
|
@param totalReduces total reduces.
|
|
@deprecated Use {@link #logInited(JobID, long, int, int)} and
|
|
{@link #logStarted(JobID)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logStarted"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<doc>
|
|
<![CDATA[Logs job as running
|
|
@param jobId job id, assigned by jobtracker.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logFinished"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<param name="finishTime" type="long"/>
|
|
<param name="finishedMaps" type="int"/>
|
|
<param name="finishedReduces" type="int"/>
|
|
<param name="failedMaps" type="int"/>
|
|
<param name="failedReduces" type="int"/>
|
|
<param name="counters" type="org.apache.hadoop.mapred.Counters"/>
|
|
<doc>
|
|
<![CDATA[Log job finished. closes the job file in history.
|
|
@param jobId job id, assigned by jobtracker.
|
|
@param finishTime finish time of job in ms.
|
|
@param finishedMaps no of maps successfully finished.
|
|
@param finishedReduces no of reduces finished sucessfully.
|
|
@param failedMaps no of failed map tasks.
|
|
@param failedReduces no of failed reduce tasks.
|
|
@param counters the counters from the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logFailed"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
<param name="timestamp" type="long"/>
|
|
<param name="finishedMaps" type="int"/>
|
|
<param name="finishedReduces" type="int"/>
|
|
<doc>
|
|
<![CDATA[Logs job failed event. Closes the job history log file.
|
|
@param jobid job id
|
|
@param timestamp time when job failure was detected in ms.
|
|
@param finishedMaps no finished map tasks.
|
|
@param finishedReduces no of finished reduce tasks.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logKilled"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
<param name="timestamp" type="long"/>
|
|
<param name="finishedMaps" type="int"/>
|
|
<param name="finishedReduces" type="int"/>
|
|
<doc>
|
|
<![CDATA[Logs job killed event. Closes the job history log file.
|
|
|
|
@param jobid
|
|
job id
|
|
@param timestamp
|
|
time when job killed was issued in ms.
|
|
@param finishedMaps
|
|
no finished map tasks.
|
|
@param finishedReduces
|
|
no of finished reduce tasks.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logJobPriority"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
<param name="priority" type="org.apache.hadoop.mapred.JobPriority"/>
|
|
<doc>
|
|
<![CDATA[Log job's priority.
|
|
@param jobid job id
|
|
@param priority Jobs priority]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logJobInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="Use {@link #logJobInfo(JobID, long, long)} instead.">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
<param name="submitTime" type="long"/>
|
|
<param name="launchTime" type="long"/>
|
|
<param name="restartCount" type="int"/>
|
|
<doc>
|
|
<![CDATA[Log job's submit-time/launch-time
|
|
@param jobid job id
|
|
@param submitTime job's submit time
|
|
@param launchTime job's launch time
|
|
@param restartCount number of times the job got restarted
|
|
@deprecated Use {@link #logJobInfo(JobID, long, long)} instead.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logJobInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
<param name="submitTime" type="long"/>
|
|
<param name="launchTime" type="long"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Helper class for logging or reading back events related to job start, finish or failure.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobHistory.JobInfo -->
|
|
<!-- start class org.apache.hadoop.mapred.JobHistory.Keys -->
|
|
<class name="JobHistory.Keys" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapred.JobHistory.Keys[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<field name="JOBTRACKERID" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="START_TIME" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="FINISH_TIME" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="JOBID" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="JOBNAME" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="USER" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="JOBCONF" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="SUBMIT_TIME" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="LAUNCH_TIME" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="TOTAL_MAPS" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="TOTAL_REDUCES" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="FAILED_MAPS" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="FAILED_REDUCES" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="FINISHED_MAPS" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="FINISHED_REDUCES" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="JOB_STATUS" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="TASKID" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="HOSTNAME" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="TASK_TYPE" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="ERROR" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="TASK_ATTEMPT_ID" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="TASK_STATUS" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="COPY_PHASE" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="SORT_PHASE" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="REDUCE_PHASE" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="SHUFFLE_FINISHED" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="SORT_FINISHED" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="COUNTERS" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="SPLITS" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="JOB_PRIORITY" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="HTTP_PORT" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="TRACKER_NAME" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="STATE_STRING" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="VERSION" type="org.apache.hadoop.mapred.JobHistory.Keys"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[Job history files contain key="value" pairs, where keys belong to this enum.
|
|
It acts as a global namespace for all keys.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobHistory.Keys -->
|
|
<!-- start interface org.apache.hadoop.mapred.JobHistory.Listener -->
|
|
<interface name="JobHistory.Listener" abstract="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="handle"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="recType" type="org.apache.hadoop.mapred.JobHistory.RecordTypes"/>
|
|
<param name="values" type="java.util.Map"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Callback method for history parser.
|
|
@param recType type of record, which is the first entry in the line.
|
|
@param values a map of key-value pairs as thry appear in history.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Callback interface for reading back log events from JobHistory. This interface
|
|
should be implemented and passed to JobHistory.parseHistory()]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.JobHistory.Listener -->
|
|
<!-- start class org.apache.hadoop.mapred.JobHistory.MapAttempt -->
|
|
<class name="JobHistory.MapAttempt" extends="org.apache.hadoop.mapred.JobHistory.TaskAttempt"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobHistory.MapAttempt"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="logStarted"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link #logStarted(TaskAttemptID, long, String, int, String)}">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="startTime" type="long"/>
|
|
<param name="hostName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log start time of this map task attempt.
|
|
@param taskAttemptId task attempt id
|
|
@param startTime start time of task attempt as reported by task tracker.
|
|
@param hostName host name of the task attempt.
|
|
@deprecated Use
|
|
{@link #logStarted(TaskAttemptID, long, String, int, String)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logStarted"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="startTime" type="long"/>
|
|
<param name="trackerName" type="java.lang.String"/>
|
|
<param name="httpPort" type="int"/>
|
|
<param name="taskType" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log start time of this map task attempt.
|
|
|
|
@param taskAttemptId task attempt id
|
|
@param startTime start time of task attempt as reported by task tracker.
|
|
@param trackerName name of the tracker executing the task attempt.
|
|
@param httpPort http port of the task tracker executing the task attempt
|
|
@param taskType Whether the attempt is cleanup or setup or map]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logFinished"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link #logFinished(TaskAttemptID, long, String, String, String, Counters)}">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="finishTime" type="long"/>
|
|
<param name="hostName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log finish time of map task attempt.
|
|
@param taskAttemptId task attempt id
|
|
@param finishTime finish time
|
|
@param hostName host name
|
|
@deprecated Use
|
|
{@link #logFinished(TaskAttemptID, long, String, String, String, Counters)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logFinished"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="finishTime" type="long"/>
|
|
<param name="hostName" type="java.lang.String"/>
|
|
<param name="taskType" type="java.lang.String"/>
|
|
<param name="stateString" type="java.lang.String"/>
|
|
<param name="counter" type="org.apache.hadoop.mapred.Counters"/>
|
|
<doc>
|
|
<![CDATA[Log finish time of map task attempt.
|
|
|
|
@param taskAttemptId task attempt id
|
|
@param finishTime finish time
|
|
@param hostName host name
|
|
@param taskType Whether the attempt is cleanup or setup or map
|
|
@param stateString state string of the task attempt
|
|
@param counter counters of the task attempt]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logFailed"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link #logFailed(TaskAttemptID, long, String, String, String)}">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="timestamp" type="long"/>
|
|
<param name="hostName" type="java.lang.String"/>
|
|
<param name="error" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log task attempt failed event.
|
|
@param taskAttemptId task attempt id
|
|
@param timestamp timestamp
|
|
@param hostName hostname of this task attempt.
|
|
@param error error message if any for this task attempt.
|
|
@deprecated Use
|
|
{@link #logFailed(TaskAttemptID, long, String, String, String)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logFailed"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="timestamp" type="long"/>
|
|
<param name="hostName" type="java.lang.String"/>
|
|
<param name="error" type="java.lang.String"/>
|
|
<param name="taskType" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log task attempt failed event.
|
|
|
|
@param taskAttemptId task attempt id
|
|
@param timestamp timestamp
|
|
@param hostName hostname of this task attempt.
|
|
@param error error message if any for this task attempt.
|
|
@param taskType Whether the attempt is cleanup or setup or map]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logKilled"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link #logKilled(TaskAttemptID, long, String, String, String)}">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="timestamp" type="long"/>
|
|
<param name="hostName" type="java.lang.String"/>
|
|
<param name="error" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log task attempt killed event.
|
|
@param taskAttemptId task attempt id
|
|
@param timestamp timestamp
|
|
@param hostName hostname of this task attempt.
|
|
@param error error message if any for this task attempt.
|
|
@deprecated Use
|
|
{@link #logKilled(TaskAttemptID, long, String, String, String)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logKilled"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="timestamp" type="long"/>
|
|
<param name="hostName" type="java.lang.String"/>
|
|
<param name="error" type="java.lang.String"/>
|
|
<param name="taskType" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log task attempt killed event.
|
|
|
|
@param taskAttemptId task attempt id
|
|
@param timestamp timestamp
|
|
@param hostName hostname of this task attempt.
|
|
@param error error message if any for this task attempt.
|
|
@param taskType Whether the attempt is cleanup or setup or map]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Helper class for logging or reading back events related to start, finish or failure of
|
|
a Map Attempt on a node.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobHistory.MapAttempt -->
|
|
<!-- start class org.apache.hadoop.mapred.JobHistory.RecordTypes -->
|
|
<class name="JobHistory.RecordTypes" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapred.JobHistory.RecordTypes[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapred.JobHistory.RecordTypes"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<field name="Jobtracker" type="org.apache.hadoop.mapred.JobHistory.RecordTypes"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="Job" type="org.apache.hadoop.mapred.JobHistory.RecordTypes"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="Task" type="org.apache.hadoop.mapred.JobHistory.RecordTypes"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="MapAttempt" type="org.apache.hadoop.mapred.JobHistory.RecordTypes"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="ReduceAttempt" type="org.apache.hadoop.mapred.JobHistory.RecordTypes"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="Meta" type="org.apache.hadoop.mapred.JobHistory.RecordTypes"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[Record types are identifiers for each line of log in history files.
|
|
A record type appears as the first token in a single line of log.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobHistory.RecordTypes -->
|
|
<!-- start class org.apache.hadoop.mapred.JobHistory.ReduceAttempt -->
|
|
<class name="JobHistory.ReduceAttempt" extends="org.apache.hadoop.mapred.JobHistory.TaskAttempt"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobHistory.ReduceAttempt"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="logStarted"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link #logStarted(TaskAttemptID, long, String, int, String)}">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="startTime" type="long"/>
|
|
<param name="hostName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log start time of Reduce task attempt.
|
|
@param taskAttemptId task attempt id
|
|
@param startTime start time
|
|
@param hostName host name
|
|
@deprecated Use
|
|
{@link #logStarted(TaskAttemptID, long, String, int, String)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logStarted"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="startTime" type="long"/>
|
|
<param name="trackerName" type="java.lang.String"/>
|
|
<param name="httpPort" type="int"/>
|
|
<param name="taskType" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log start time of Reduce task attempt.
|
|
|
|
@param taskAttemptId task attempt id
|
|
@param startTime start time
|
|
@param trackerName tracker name
|
|
@param httpPort the http port of the tracker executing the task attempt
|
|
@param taskType Whether the attempt is cleanup or setup or reduce]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logFinished"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link #logFinished(TaskAttemptID, long, long, long, String, String, String, Counters)}">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="shuffleFinished" type="long"/>
|
|
<param name="sortFinished" type="long"/>
|
|
<param name="finishTime" type="long"/>
|
|
<param name="hostName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log finished event of this task.
|
|
@param taskAttemptId task attempt id
|
|
@param shuffleFinished shuffle finish time
|
|
@param sortFinished sort finish time
|
|
@param finishTime finish time of task
|
|
@param hostName host name where task attempt executed
|
|
@deprecated Use
|
|
{@link #logFinished(TaskAttemptID, long, long, long, String, String, String, Counters)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logFinished"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="shuffleFinished" type="long"/>
|
|
<param name="sortFinished" type="long"/>
|
|
<param name="finishTime" type="long"/>
|
|
<param name="hostName" type="java.lang.String"/>
|
|
<param name="taskType" type="java.lang.String"/>
|
|
<param name="stateString" type="java.lang.String"/>
|
|
<param name="counter" type="org.apache.hadoop.mapred.Counters"/>
|
|
<doc>
|
|
<![CDATA[Log finished event of this task.
|
|
|
|
@param taskAttemptId task attempt id
|
|
@param shuffleFinished shuffle finish time
|
|
@param sortFinished sort finish time
|
|
@param finishTime finish time of task
|
|
@param hostName host name where task attempt executed
|
|
@param taskType Whether the attempt is cleanup or setup or reduce
|
|
@param stateString the state string of the attempt
|
|
@param counter counters of the attempt]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logFailed"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link #logFailed(TaskAttemptID, long, String, String, String)}">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="timestamp" type="long"/>
|
|
<param name="hostName" type="java.lang.String"/>
|
|
<param name="error" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log failed reduce task attempt.
|
|
@param taskAttemptId task attempt id
|
|
@param timestamp time stamp when task failed
|
|
@param hostName host name of the task attempt.
|
|
@param error error message of the task.
|
|
@deprecated Use
|
|
{@link #logFailed(TaskAttemptID, long, String, String, String)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logFailed"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="timestamp" type="long"/>
|
|
<param name="hostName" type="java.lang.String"/>
|
|
<param name="error" type="java.lang.String"/>
|
|
<param name="taskType" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log failed reduce task attempt.
|
|
|
|
@param taskAttemptId task attempt id
|
|
@param timestamp time stamp when task failed
|
|
@param hostName host name of the task attempt.
|
|
@param error error message of the task.
|
|
@param taskType Whether the attempt is cleanup or setup or reduce]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logKilled"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link #logKilled(TaskAttemptID, long, String, String, String)}">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="timestamp" type="long"/>
|
|
<param name="hostName" type="java.lang.String"/>
|
|
<param name="error" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log killed reduce task attempt.
|
|
@param taskAttemptId task attempt id
|
|
@param timestamp time stamp when task failed
|
|
@param hostName host name of the task attempt.
|
|
@param error error message of the task.
|
|
@deprecated Use
|
|
{@link #logKilled(TaskAttemptID, long, String, String, String)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logKilled"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="timestamp" type="long"/>
|
|
<param name="hostName" type="java.lang.String"/>
|
|
<param name="error" type="java.lang.String"/>
|
|
<param name="taskType" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log killed reduce task attempt.
|
|
|
|
@param taskAttemptId task attempt id
|
|
@param timestamp time stamp when task failed
|
|
@param hostName host name of the task attempt.
|
|
@param error error message of the task.
|
|
@param taskType Whether the attempt is cleanup or setup or reduce]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Helper class for logging or reading back events related to start, finish or failure of
|
|
a Map Attempt on a node.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobHistory.ReduceAttempt -->
|
|
<!-- start class org.apache.hadoop.mapred.JobHistory.Task -->
|
|
<class name="JobHistory.Task" extends="org.apache.hadoop.mapred.JobHistory.KeyValuePair"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobHistory.Task"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="logStarted"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapred.TaskID"/>
|
|
<param name="taskType" type="java.lang.String"/>
|
|
<param name="startTime" type="long"/>
|
|
<param name="splitLocations" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log start time of task (TIP).
|
|
@param taskId task id
|
|
@param taskType MAP or REDUCE
|
|
@param startTime startTime of tip.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logFinished"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapred.TaskID"/>
|
|
<param name="taskType" type="java.lang.String"/>
|
|
<param name="finishTime" type="long"/>
|
|
<param name="counters" type="org.apache.hadoop.mapred.Counters"/>
|
|
<doc>
|
|
<![CDATA[Log finish time of task.
|
|
@param taskId task id
|
|
@param taskType MAP or REDUCE
|
|
@param finishTime finish timeof task in ms]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logUpdates"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapred.TaskID"/>
|
|
<param name="finishTime" type="long"/>
|
|
<doc>
|
|
<![CDATA[Update the finish time of task.
|
|
@param taskId task id
|
|
@param finishTime finish time of task in ms]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logFailed"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapred.TaskID"/>
|
|
<param name="taskType" type="java.lang.String"/>
|
|
<param name="time" type="long"/>
|
|
<param name="error" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Log job failed event.
|
|
@param taskId task id
|
|
@param taskType MAP or REDUCE.
|
|
@param time timestamp when job failed detected.
|
|
@param error error message for failure.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="logFailed"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapred.TaskID"/>
|
|
<param name="taskType" type="java.lang.String"/>
|
|
<param name="time" type="long"/>
|
|
<param name="error" type="java.lang.String"/>
|
|
<param name="failedDueToAttempt" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<doc>
|
|
<![CDATA[@param failedDueToAttempt The attempt that caused the failure, if any]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskAttempts" return="java.util.Map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns all task attempts for this task. <task attempt id - TaskAttempt>]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Helper class for logging or reading back events related to Task's start, finish or failure.
|
|
All events logged by this class are logged in a separate file per job in
|
|
job tracker history. These events map to TIPs in jobtracker.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobHistory.Task -->
|
|
<!-- start class org.apache.hadoop.mapred.JobHistory.TaskAttempt -->
|
|
<class name="JobHistory.TaskAttempt" extends="org.apache.hadoop.mapred.JobHistory.Task"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobHistory.TaskAttempt"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[Base class for Map and Reduce TaskAttempts.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobHistory.TaskAttempt -->
|
|
<!-- start class org.apache.hadoop.mapred.JobHistory.Values -->
|
|
<class name="JobHistory.Values" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapred.JobHistory.Values[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapred.JobHistory.Values"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<field name="SUCCESS" type="org.apache.hadoop.mapred.JobHistory.Values"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="FAILED" type="org.apache.hadoop.mapred.JobHistory.Values"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="KILLED" type="org.apache.hadoop.mapred.JobHistory.Values"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="MAP" type="org.apache.hadoop.mapred.JobHistory.Values"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="REDUCE" type="org.apache.hadoop.mapred.JobHistory.Values"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="CLEANUP" type="org.apache.hadoop.mapred.JobHistory.Values"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="RUNNING" type="org.apache.hadoop.mapred.JobHistory.Values"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="PREP" type="org.apache.hadoop.mapred.JobHistory.Values"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="SETUP" type="org.apache.hadoop.mapred.JobHistory.Values"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This enum contains some of the values commonly used by history log events.
|
|
since values in history can only be strings - Values.name() is used in
|
|
most places in history file.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobHistory.Values -->
|
|
<!-- start class org.apache.hadoop.mapred.JobID -->
|
|
<class name="JobID" extends="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobID" type="java.lang.String, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a JobID object
|
|
@param jtIdentifier jobTracker identifier
|
|
@param id job number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobID"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="downgrade" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="old" type="org.apache.hadoop.mapreduce.JobID"/>
|
|
<doc>
|
|
<![CDATA[Downgrade a new JobID to an old one
|
|
@param old a new or old JobID
|
|
@return either old or a new JobID build to match old]]>
|
|
</doc>
|
|
</method>
|
|
<method name="read" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="forName" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="str" type="java.lang.String"/>
|
|
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
|
|
<doc>
|
|
<![CDATA[Construct a JobId object from given string
|
|
@return constructed JobId object or null if the given String is null
|
|
@throws IllegalArgumentException if the given string is malformed]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobIDsPattern" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jtIdentifier" type="java.lang.String"/>
|
|
<param name="jobId" type="java.lang.Integer"/>
|
|
<doc>
|
|
<![CDATA[Returns a regex pattern which matches task IDs. Arguments can
|
|
be given null, in which case that part of the regex will be generic.
|
|
For example to obtain a regex matching <i>any job</i>
|
|
run on the jobtracker started at <i>200707121733</i>, we would use :
|
|
<pre>
|
|
JobID.getTaskIDsPattern("200707121733", null);
|
|
</pre>
|
|
which will return :
|
|
<pre> "job_200707121733_[0-9]*" </pre>
|
|
@param jtIdentifier jobTracker identifier, or null
|
|
@param jobId job number, or null
|
|
@return a regex pattern matching JobIDs]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[JobID represents the immutable and unique identifier for
|
|
the job. JobID consists of two parts. First part
|
|
represents the jobtracker identifier, so that jobID to jobtracker map
|
|
is defined. For cluster setup this string is the jobtracker
|
|
start time, for local setting, it is "local".
|
|
Second part of the JobID is the job number. <br>
|
|
An example JobID is :
|
|
<code>job_200707121733_0003</code> , which represents the third job
|
|
running at the jobtracker started at <code>200707121733</code>.
|
|
<p>
|
|
Applications should never construct or parse JobID strings, but rather
|
|
use appropriate constructors or {@link #forName(String)} method.
|
|
|
|
@see TaskID
|
|
@see TaskAttemptID]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobID -->
|
|
<!-- start class org.apache.hadoop.mapred.JobPriority -->
|
|
<class name="JobPriority" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapred.JobPriority[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapred.JobPriority"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<field name="VERY_HIGH" type="org.apache.hadoop.mapred.JobPriority"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="HIGH" type="org.apache.hadoop.mapred.JobPriority"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="NORMAL" type="org.apache.hadoop.mapred.JobPriority"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="LOW" type="org.apache.hadoop.mapred.JobPriority"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="VERY_LOW" type="org.apache.hadoop.mapred.JobPriority"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[Used to describe the priority of the running job.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobPriority -->
|
|
<!-- start class org.apache.hadoop.mapred.JobProfile -->
|
|
<class name="JobProfile" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="JobProfile"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct an empty {@link JobProfile}.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobProfile" type="java.lang.String, org.apache.hadoop.mapreduce.JobID, java.lang.String, java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a {@link JobProfile} the userid, jobid,
|
|
job config-file, job-details url and job name.
|
|
|
|
@param user userid of the person who submitted the job.
|
|
@param jobid id of the job.
|
|
@param jobFile job configuration file.
|
|
@param url link to the web-ui for details of the job.
|
|
@param name user-specified job name.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobProfile" type="java.lang.String, org.apache.hadoop.mapreduce.JobID, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a {@link JobProfile} the userid, jobid,
|
|
job config-file, job-details url and job name.
|
|
|
|
@param user userid of the person who submitted the job.
|
|
@param jobid id of the job.
|
|
@param jobFile job configuration file.
|
|
@param url link to the web-ui for details of the job.
|
|
@param name user-specified job name.
|
|
@param queueName name of the queue to which the job is submitted]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobProfile" type="java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="use JobProfile(String, JobID, String, String, String) instead">
|
|
<doc>
|
|
<![CDATA[@deprecated use JobProfile(String, JobID, String, String, String) instead]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getUser" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user id.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobID" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the job id.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobId" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="use getJobID() instead">
|
|
<doc>
|
|
<![CDATA[@deprecated use getJobID() instead]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobFile" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the configuration file for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getURL" return="java.net.URL"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the link to the web-ui for details of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user-specified job name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getQueueName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the name of the queue to which the job is submitted.
|
|
@return name of the queue.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A JobProfile is a MapReduce primitive. Tracks a job,
|
|
whether living or dead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobProfile -->
|
|
<!-- start class org.apache.hadoop.mapred.JobQueueInfo -->
|
|
<class name="JobQueueInfo" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="JobQueueInfo"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Default constructor for Job Queue Info.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobQueueInfo" type="java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a new JobQueueInfo object using the queue name and the
|
|
scheduling information passed.
|
|
|
|
@param queueName Name of the job queue
|
|
@param schedulingInfo Scheduling Information associated with the job
|
|
queue]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="setQueueName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="queueName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the queue name of the JobQueueInfo
|
|
|
|
@param queueName Name of the job queue.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getQueueName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the queue name from JobQueueInfo
|
|
|
|
@return queue name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSchedulingInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="schedulingInfo" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the scheduling information associated to particular job queue
|
|
|
|
@param schedulingInfo]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSchedulingInfo" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Gets the scheduling information associated to particular job queue.
|
|
If nothing is set would return <b>"N/A"</b>
|
|
|
|
@return Scheduling information associated to particular Job Queue]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Class that contains the information regarding the Job Queues which are
|
|
maintained by the Hadoop Map/Reduce framework.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobQueueInfo -->
|
|
<!-- start class org.apache.hadoop.mapred.JobStatus -->
|
|
<class name="JobStatus" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<implements name="java.lang.Cloneable"/>
|
|
<constructor name="JobStatus"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a job status object for a given jobid.
|
|
@param jobid The jobid of the job
|
|
@param mapProgress The progress made on the maps
|
|
@param reduceProgress The progress made on the reduces
|
|
@param cleanupProgress The progress made on cleanup
|
|
@param runState The current state of the job]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a job status object for a given jobid.
|
|
@param jobid The jobid of the job
|
|
@param mapProgress The progress made on the maps
|
|
@param reduceProgress The progress made on the reduces
|
|
@param runState The current state of the job]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int, org.apache.hadoop.mapred.JobPriority"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a job status object for a given jobid.
|
|
@param jobid The jobid of the job
|
|
@param mapProgress The progress made on the maps
|
|
@param reduceProgress The progress made on the reduces
|
|
@param runState The current state of the job
|
|
@param jp Priority of the job.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a job status object for a given jobid.
|
|
@param jobid The jobid of the job
|
|
@param setupProgress The progress made on the setup
|
|
@param mapProgress The progress made on the maps
|
|
@param reduceProgress The progress made on the reduces
|
|
@param cleanupProgress The progress made on the cleanup
|
|
@param runState The current state of the job
|
|
@param jp Priority of the job.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getJobId" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="use getJobID instead">
|
|
<doc>
|
|
<![CDATA[@deprecated use getJobID instead]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobID" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return The jobid of the Job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="mapProgress" return="float"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Percentage of progress in maps]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanupProgress" return="float"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Percentage of progress in cleanup]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setupProgress" return="float"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Percentage of progress in setup]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reduceProgress" return="float"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return Percentage of progress in reduce]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRunState" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return running state of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setRunState"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="state" type="int"/>
|
|
<doc>
|
|
<![CDATA[Change the current run state of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getStartTime" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return start time of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="clone" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getUsername" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the username of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSchedulingInfo" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Gets the Scheduling information associated to a particular Job.
|
|
@return the scheduling information of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSchedulingInfo"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="schedulingInfo" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Used to set the scheduling information associated to a particular Job.
|
|
|
|
@param schedulingInfo Scheduling information of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobPriority" return="org.apache.hadoop.mapred.JobPriority"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the priority of the job
|
|
@return job priority]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobPriority"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jp" type="org.apache.hadoop.mapred.JobPriority"/>
|
|
<doc>
|
|
<![CDATA[Set the priority of the job, defaulting to NORMAL.
|
|
@param jp new job priority]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isJobComplete" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns true if the status is for a completed job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="RUNNING" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="SUCCEEDED" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="FAILED" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="PREP" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="KILLED" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[Describes the current status of a job. This is
|
|
not intended to be a comprehensive piece of data.
|
|
For that, look at JobProfile.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobStatus -->
|
|
<!-- start class org.apache.hadoop.mapred.JobTracker -->
|
|
<class name="JobTracker" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.MRConstants"/>
|
|
<implements name="org.apache.hadoop.mapred.InterTrackerProtocol"/>
|
|
<implements name="org.apache.hadoop.mapred.JobSubmissionProtocol"/>
|
|
<implements name="org.apache.hadoop.mapred.TaskTrackerManager"/>
|
|
<implements name="org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol"/>
|
|
<method name="startTracker" return="org.apache.hadoop.mapred.JobTracker"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Start the JobTracker with given configuration.
|
|
|
|
The conf will be modified to reflect the actual ports on which
|
|
the JobTracker is up and running if the user passes the port as
|
|
<code>zero</code>.
|
|
|
|
@param conf configuration for the JobTracker.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="stopTracker"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getProtocolVersion" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="protocol" type="java.lang.String"/>
|
|
<param name="clientVersion" type="long"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="hasRestarted" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Whether the JT has restarted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hasRecovered" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Whether the JT has recovered upon restart]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecoveryDuration" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[How long the jobtracker took to recover from restart.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInstrumentationClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="setInstrumentationClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="t" type="java.lang.Class"/>
|
|
</method>
|
|
<method name="getAddress" return="java.net.InetSocketAddress"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="offerService"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Run forever]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTotalSubmissions" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobTrackerMachine" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getTrackerIdentifier" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the unique identifier (ie. timestamp) of this job tracker start.
|
|
@return a string with a unique identifier]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTrackerPort" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getInfoPort" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getStartTime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="runningJobs" return="java.util.Vector"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getRunningJobs" return="java.util.List"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Version that is called from a timer thread, and therefore needs to be
|
|
careful to synchronize.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="failedJobs" return="java.util.Vector"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="completedJobs" return="java.util.Vector"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="taskTrackers" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get all the task trackers in the cluster
|
|
|
|
@return {@link Collection} of {@link TaskTrackerStatus}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="activeTaskTrackers" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the active task tracker statuses in the cluster
|
|
|
|
@return {@link Collection} of active {@link TaskTrackerStatus}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="taskTrackerNames" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the active and blacklisted task tracker names in the cluster. The first
|
|
element in the returned list contains the list of active tracker names.
|
|
The second element in the returned list contains the list of blacklisted
|
|
tracker names.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="blacklistedTaskTrackers" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the blacklisted task tracker statuses in the cluster
|
|
|
|
@return {@link Collection} of blacklisted {@link TaskTrackerStatus}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isBlacklisted" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="trackerID" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Whether the tracker is blacklisted or not
|
|
|
|
@param trackerID
|
|
|
|
@return true if blacklisted, false otherwise]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskTracker" return="org.apache.hadoop.mapred.TaskTrackerStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="trackerID" type="java.lang.String"/>
|
|
</method>
|
|
<method name="resolveAndAddToTopology" return="org.apache.hadoop.net.Node"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getNodesAtMaxLevel" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns a collection of nodes at the max level]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getParentNode" return="org.apache.hadoop.net.Node"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="node" type="org.apache.hadoop.net.Node"/>
|
|
<param name="level" type="int"/>
|
|
</method>
|
|
<method name="getNode" return="org.apache.hadoop.net.Node"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Return the Node in the network topology that corresponds to the hostname]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumTaskCacheLevels" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getNumResolvedTaskTrackers" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getNumberOfUniqueHosts" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="addJobInProgressListener"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="listener" type="org.apache.hadoop.mapred.JobInProgressListener"/>
|
|
</method>
|
|
<method name="removeJobInProgressListener"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="listener" type="org.apache.hadoop.mapred.JobInProgressListener"/>
|
|
</method>
|
|
<method name="getQueueManager" return="org.apache.hadoop.mapred.QueueManager"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the {@link QueueManager} associated with the JobTracker.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getBuildVersion" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="heartbeat" return="org.apache.hadoop.mapred.HeartbeatResponse"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="status" type="org.apache.hadoop.mapred.TaskTrackerStatus"/>
|
|
<param name="restarted" type="boolean"/>
|
|
<param name="initialContact" type="boolean"/>
|
|
<param name="acceptNewTasks" type="boolean"/>
|
|
<param name="responseId" type="short"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[The periodic heartbeat mechanism between the {@link TaskTracker} and
|
|
the {@link JobTracker}.
|
|
|
|
The {@link JobTracker} processes the status information sent by the
|
|
{@link TaskTracker} and responds with instructions to start/stop
|
|
tasks or jobs, and also 'reset' instructions during contingencies.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNextHeartbeatInterval" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Calculates next heartbeat interval using cluster size.
|
|
Heartbeat interval is incremented 1second for every 50 nodes.
|
|
@return next heartbeat interval.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getFilesystemName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Grab the local fs name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reportTaskTrackerError"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskTracker" type="java.lang.String"/>
|
|
<param name="errorClass" type="java.lang.String"/>
|
|
<param name="errorMessage" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getNewJobId" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Allocates a new JobId string.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="submitJob" return="org.apache.hadoop.mapred.JobStatus"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[JobTracker.submitJob() kicks off a new job.
|
|
|
|
Create a 'JobInProgress' object, which contains both JobProfile
|
|
and JobStatus. Those two sub-objects are sometimes shipped outside
|
|
of the JobTracker. But JobInProgress adds info that's useful for
|
|
the JobTracker alone.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="use {@link #getClusterStatus(boolean)}">
|
|
<doc>
|
|
<![CDATA[@deprecated use {@link #getClusterStatus(boolean)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="detailed" type="boolean"/>
|
|
</method>
|
|
<method name="killJob"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="setJobPriority"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
<param name="priority" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Set the priority of a job
|
|
@param jobid id of the job
|
|
@param priority new priority of the job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobProfile" return="org.apache.hadoop.mapred.JobProfile"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
</method>
|
|
<method name="getJobStatus" return="org.apache.hadoop.mapred.JobStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
</method>
|
|
<method name="getJobCounters" return="org.apache.hadoop.mapred.Counters"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
</method>
|
|
<method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
</method>
|
|
<method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
</method>
|
|
<method name="getCleanupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
</method>
|
|
<method name="getSetupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
</method>
|
|
<method name="getTaskCompletionEvents" return="org.apache.hadoop.mapred.TaskCompletionEvent[]"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
<param name="fromEventId" type="int"/>
|
|
<param name="maxEvents" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getTaskDiagnostics" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the diagnostics for a given task
|
|
@param taskId the id of the task
|
|
@return an array of the diagnostic messages]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTip" return="org.apache.hadoop.mapred.TaskInProgress"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="tipid" type="org.apache.hadoop.mapred.TaskID"/>
|
|
<doc>
|
|
<![CDATA[Returns specified TaskInProgress, or null.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="killTask" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="shouldFail" type="boolean"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Mark a Task to be killed]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getAssignedTracker" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<doc>
|
|
<![CDATA[Get tracker name for a given task id.
|
|
@param taskId the name of the task
|
|
@return The name of the task tracker]]>
|
|
</doc>
|
|
</method>
|
|
<method name="jobsToComplete" return="org.apache.hadoop.mapred.JobStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getAllJobs" return="org.apache.hadoop.mapred.JobStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getSystemDir" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@see org.apache.hadoop.mapred.JobSubmissionProtocol#getSystemDir()]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJob" return="org.apache.hadoop.mapred.JobInProgress"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
|
|
</method>
|
|
<method name="getLocalJobFilePath" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<doc>
|
|
<![CDATA[Get the localized job file path on the job trackers local file system
|
|
@param jobId id of the job
|
|
@return the path of the job conf file on the local file system]]>
|
|
</doc>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="argv" type="java.lang.String[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Start the JobTracker process. This is used only for debugging. As a rule,
|
|
JobTracker should be run as part of the DFS Namenode process.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getQueueInfo" return="org.apache.hadoop.mapred.JobQueueInfo"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="queue" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getJobsFromQueue" return="org.apache.hadoop.mapred.JobStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="queue" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="refreshServiceAcl"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[JobTracker is the central location for submitting and
|
|
tracking MR jobs in a network environment.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobTracker -->
|
|
<!-- start class org.apache.hadoop.mapred.JobTracker.IllegalStateException -->
|
|
<class name="JobTracker.IllegalStateException" extends="java.io.IOException"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobTracker.IllegalStateException" type="java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[A client tried to submit a job before the Job Tracker was ready.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobTracker.IllegalStateException -->
|
|
<!-- start class org.apache.hadoop.mapred.JobTracker.State -->
|
|
<class name="JobTracker.State" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapred.JobTracker.State[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapred.JobTracker.State"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<field name="INITIALIZING" type="org.apache.hadoop.mapred.JobTracker.State"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="RUNNING" type="org.apache.hadoop.mapred.JobTracker.State"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.JobTracker.State -->
|
|
<!-- start class org.apache.hadoop.mapred.KeyValueLineRecordReader -->
|
|
<class name="KeyValueLineRecordReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordReader"/>
|
|
<constructor name="KeyValueLineRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="getKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="createKey" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="createValue" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="findSeparator" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="utf" type="byte[]"/>
|
|
<param name="start" type="int"/>
|
|
<param name="length" type="int"/>
|
|
<param name="sep" type="byte"/>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.Text"/>
|
|
<param name="value" type="org.apache.hadoop.io.Text"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read key/value pair in a line.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class treats a line in the input as a key/value pair separated by a
|
|
separator character. The separator can be specified in config file
|
|
under the attribute name key.value.separator.in.input.line. The default
|
|
separator is the tab character ('\t').]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.KeyValueLineRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.KeyValueTextInputFormat -->
|
|
<class name="KeyValueTextInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<constructor name="KeyValueTextInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="isSplitable" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="file" type="org.apache.hadoop.fs.Path"/>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines.
|
|
Either linefeed or carriage-return are used to signal end of line. Each line
|
|
is divided into key and value parts by a separator byte. If no such a byte
|
|
exists, the key will be the entire line and value will be empty.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.KeyValueTextInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.LineRecordReader -->
|
|
<class name="LineRecordReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link org.apache.hadoop.mapreduce.lib.input.LineRecordReader} instead.">
|
|
<implements name="org.apache.hadoop.mapred.RecordReader"/>
|
|
<constructor name="LineRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<constructor name="LineRecordReader" type="java.io.InputStream, long, long, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="LineRecordReader" type="java.io.InputStream, long, long, org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="createKey" return="org.apache.hadoop.io.LongWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="createValue" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.LongWritable"/>
|
|
<param name="value" type="org.apache.hadoop.io.Text"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read a line.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the progress within the split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Treats keys as offset in file and value as line.
|
|
@deprecated Use
|
|
{@link org.apache.hadoop.mapreduce.lib.input.LineRecordReader} instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.LineRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.LineRecordReader.LineReader -->
|
|
<class name="LineRecordReader.LineReader" extends="org.apache.hadoop.util.LineReader"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.util.LineReader} instead.">
|
|
<constructor name="LineRecordReader.LineReader" type="java.io.InputStream, org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[A class that provides a line reader from an input stream.
|
|
@deprecated Use {@link org.apache.hadoop.util.LineReader} instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.LineRecordReader.LineReader -->
|
|
<!-- start class org.apache.hadoop.mapred.MapFileOutputFormat -->
|
|
<class name="MapFileOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MapFileOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getReaders" return="org.apache.hadoop.io.MapFile.Reader[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="dir" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Open the output generated by this format.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getEntry" return="org.apache.hadoop.io.Writable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="readers" type="org.apache.hadoop.io.MapFile.Reader[]"/>
|
|
<param name="partitioner" type="org.apache.hadoop.mapred.Partitioner"/>
|
|
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<param name="value" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get an entry from output generated by this class.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link OutputFormat} that writes {@link MapFile}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.MapFileOutputFormat -->
|
|
<!-- start interface org.apache.hadoop.mapred.Mapper -->
|
|
<interface name="Mapper" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.Mapper} instead.">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<implements name="org.apache.hadoop.io.Closeable"/>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Maps a single input key/value pair into an intermediate key/value pair.
|
|
|
|
<p>Output pairs need not be of the same types as input pairs. A given
|
|
input pair may map to zero or many output pairs. Output pairs are
|
|
collected with calls to
|
|
{@link OutputCollector#collect(Object,Object)}.</p>
|
|
|
|
<p>Applications can use the {@link Reporter} provided to report progress
|
|
or just indicate that they are alive. In scenarios where the application
|
|
takes an insignificant amount of time to process individual key/value
|
|
pairs, this is crucial since the framework might assume that the task has
|
|
timed-out and kill that task. The other way of avoiding this is to set
|
|
<a href="{@docRoot}/../mapred-default.html#mapred.task.timeout">
|
|
mapred.task.timeout</a> to a high-enough value (or even zero for no
|
|
time-outs).</p>
|
|
|
|
@param key the input key.
|
|
@param value the input value.
|
|
@param output collects mapped keys and values.
|
|
@param reporter facility to report progress.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Maps input key/value pairs to a set of intermediate key/value pairs.
|
|
|
|
<p>Maps are the individual tasks which transform input records into a
|
|
intermediate records. The transformed intermediate records need not be of
|
|
the same type as the input records. A given input pair may map to zero or
|
|
many output pairs.</p>
|
|
|
|
<p>The Hadoop Map-Reduce framework spawns one map task for each
|
|
{@link InputSplit} generated by the {@link InputFormat} for the job.
|
|
<code>Mapper</code> implementations can access the {@link JobConf} for the
|
|
job via the {@link JobConfigurable#configure(JobConf)} and initialize
|
|
themselves. Similarly they can use the {@link Closeable#close()} method for
|
|
de-initialization.</p>
|
|
|
|
<p>The framework then calls
|
|
{@link #map(Object, Object, OutputCollector, Reporter)}
|
|
for each key/value pair in the <code>InputSplit</code> for that task.</p>
|
|
|
|
<p>All intermediate values associated with a given output key are
|
|
subsequently grouped by the framework, and passed to a {@link Reducer} to
|
|
determine the final output. Users can control the grouping by specifying
|
|
a <code>Comparator</code> via
|
|
{@link JobConf#setOutputKeyComparatorClass(Class)}.</p>
|
|
|
|
<p>The grouped <code>Mapper</code> outputs are partitioned per
|
|
<code>Reducer</code>. Users can control which keys (and hence records) go to
|
|
which <code>Reducer</code> by implementing a custom {@link Partitioner}.
|
|
|
|
<p>Users can optionally specify a <code>combiner</code>, via
|
|
{@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the
|
|
intermediate outputs, which helps to cut down the amount of data transferred
|
|
from the <code>Mapper</code> to the <code>Reducer</code>.
|
|
|
|
<p>The intermediate, grouped outputs are always stored in
|
|
{@link SequenceFile}s. Applications can specify if and how the intermediate
|
|
outputs are to be compressed and which {@link CompressionCodec}s are to be
|
|
used via the <code>JobConf</code>.</p>
|
|
|
|
<p>If the job has
|
|
<a href="{@docRoot}/org/apache/hadoop/mapred/JobConf.html#ReducerNone">zero
|
|
reduces</a> then the output of the <code>Mapper</code> is directly written
|
|
to the {@link FileSystem} without grouping by keys.</p>
|
|
|
|
<p>Example:</p>
|
|
<p><blockquote><pre>
|
|
public class MyMapper<K extends WritableComparable, V extends Writable>
|
|
extends MapReduceBase implements Mapper<K, V, K, V> {
|
|
|
|
static enum MyCounters { NUM_RECORDS }
|
|
|
|
private String mapTaskId;
|
|
private String inputFile;
|
|
private int noRecords = 0;
|
|
|
|
public void configure(JobConf job) {
|
|
mapTaskId = job.get("mapred.task.id");
|
|
inputFile = job.get("map.input.file");
|
|
}
|
|
|
|
public void map(K key, V val,
|
|
OutputCollector<K, V> output, Reporter reporter)
|
|
throws IOException {
|
|
// Process the <key, value> pair (assume this takes a while)
|
|
// ...
|
|
// ...
|
|
|
|
// Let the framework know that we are alive, and kicking!
|
|
// reporter.progress();
|
|
|
|
// Process some more
|
|
// ...
|
|
// ...
|
|
|
|
// Increment the no. of <key, value> pairs processed
|
|
++noRecords;
|
|
|
|
// Increment counters
|
|
reporter.incrCounter(NUM_RECORDS, 1);
|
|
|
|
// Every 100 records update application-level status
|
|
if ((noRecords%100) == 0) {
|
|
reporter.setStatus(mapTaskId + " processed " + noRecords +
|
|
" from input-file: " + inputFile);
|
|
}
|
|
|
|
// Output the result
|
|
output.collect(key, val);
|
|
}
|
|
}
|
|
</pre></blockquote></p>
|
|
|
|
<p>Applications may write a custom {@link MapRunnable} to exert greater
|
|
control on map processing e.g. multi-threaded <code>Mapper</code>s etc.</p>
|
|
|
|
@see JobConf
|
|
@see InputFormat
|
|
@see Partitioner
|
|
@see Reducer
|
|
@see MapReduceBase
|
|
@see MapRunnable
|
|
@see SequenceFile
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.Mapper -->
|
|
<!-- start class org.apache.hadoop.mapred.MapReduceBase -->
|
|
<class name="MapReduceBase" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Closeable"/>
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<constructor name="MapReduceBase"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Default implementation that does nothing.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Default implementation that does nothing.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Base class for {@link Mapper} and {@link Reducer} implementations.
|
|
|
|
<p>Provides default no-op implementations for a few methods, most non-trivial
|
|
applications need to override some of them.</p>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.MapReduceBase -->
|
|
<!-- start class org.apache.hadoop.mapred.MapReducePolicyProvider -->
|
|
<class name="MapReducePolicyProvider" extends="org.apache.hadoop.security.authorize.PolicyProvider"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MapReducePolicyProvider"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getServices" return="org.apache.hadoop.security.authorize.Service[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[{@link PolicyProvider} for Map-Reduce protocols.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.MapReducePolicyProvider -->
|
|
<!-- start interface org.apache.hadoop.mapred.MapRunnable -->
|
|
<interface name="MapRunnable" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.Mapper} instead.">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="input" type="org.apache.hadoop.mapred.RecordReader"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Start mapping input <tt><key, value></tt> pairs.
|
|
|
|
<p>Mapping of input records to output records is complete when this method
|
|
returns.</p>
|
|
|
|
@param input the {@link RecordReader} to read the input records.
|
|
@param output the {@link OutputCollector} to collect the outputrecords.
|
|
@param reporter {@link Reporter} to report progress, status-updates etc.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Expert: Generic interface for {@link Mapper}s.
|
|
|
|
<p>Custom implementations of <code>MapRunnable</code> can exert greater
|
|
control on map processing e.g. multi-threaded, asynchronous mappers etc.</p>
|
|
|
|
@see Mapper
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.MapRunnable -->
|
|
<!-- start class org.apache.hadoop.mapred.MapRunner -->
|
|
<class name="MapRunner" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.MapRunnable"/>
|
|
<constructor name="MapRunner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="input" type="org.apache.hadoop.mapred.RecordReader"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getMapper" return="org.apache.hadoop.mapred.Mapper"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Default {@link MapRunnable} implementation.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.MapRunner -->
|
|
<!-- start class org.apache.hadoop.mapred.MultiFileInputFormat -->
|
|
<class name="MultiFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat} instead">
|
|
<constructor name="MultiFileInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="numSplits" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An abstract {@link InputFormat} that returns {@link MultiFileSplit}'s
|
|
in {@link #getSplits(JobConf, int)} method. Splits are constructed from
|
|
the files under the input paths. Each split returned contains <i>nearly</i>
|
|
equal content length. <br>
|
|
Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)}
|
|
to construct <code>RecordReader</code>'s for <code>MultiFileSplit</code>'s.
|
|
@see MultiFileSplit
|
|
@deprecated Use {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat} instead]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.MultiFileInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.MultiFileSplit -->
|
|
<class name="MultiFileSplit" extends="org.apache.hadoop.mapred.lib.CombineFileSplit"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapred.lib.CombineFileSplit} instead">
|
|
<constructor name="MultiFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A sub-collection of input files. Unlike {@link FileSplit}, MultiFileSplit
|
|
class does not represent a split of a file, but a split of input files
|
|
into smaller sets. The atomic unit of split is a file. <br>
|
|
MultiFileSplit can be used to implement {@link RecordReader}'s, with
|
|
reading one record per file.
|
|
@see FileSplit
|
|
@see MultiFileInputFormat
|
|
@deprecated Use {@link org.apache.hadoop.mapred.lib.CombineFileSplit} instead]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.MultiFileSplit -->
|
|
<!-- start interface org.apache.hadoop.mapred.OutputCollector -->
|
|
<interface name="OutputCollector" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="collect"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Adds a key/value pair to the output.
|
|
|
|
@param key the key to collect.
|
|
@param value to value to collect.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Collects the <code><key, value></code> pairs output by {@link Mapper}s
|
|
and {@link Reducer}s.
|
|
|
|
<p><code>OutputCollector</code> is the generalization of the facility
|
|
provided by the Map-Reduce framework to collect data output by either the
|
|
<code>Mapper</code> or the <code>Reducer</code> i.e. intermediate outputs
|
|
or the output of the job.</p>]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.OutputCollector -->
|
|
<!-- start class org.apache.hadoop.mapred.OutputCommitter -->
|
|
<class name="OutputCommitter" extends="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.OutputCommitter} instead.">
|
|
<constructor name="OutputCommitter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setupJob"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For the framework to setup the job output during initialization
|
|
|
|
@param jobContext Context of the job whose output is being written.
|
|
@throws IOException if temporary output could not be created]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanupJob"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For cleaning up the job's output after job completion
|
|
|
|
@param jobContext Context of the job whose output is being written.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setupTask"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Sets up output for the task.
|
|
|
|
@param taskContext Context of the task whose output is being written.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="needsTaskCommit" return="boolean"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Check whether task needs a commit
|
|
|
|
@param taskContext
|
|
@return true/false
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="commitTask"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[To promote the task's temporary output to final output location
|
|
|
|
The task's output is moved to the job's output directory.
|
|
|
|
@param taskContext Context of the task whose output is being written.
|
|
@throws IOException if commit is not]]>
|
|
</doc>
|
|
</method>
|
|
<method name="abortTask"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Discard the task output
|
|
|
|
@param taskContext
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setupJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This method implements the new interface by calling the old method. Note
|
|
that the input types are different between the new and old apis and this
|
|
is a bridge between the two.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanupJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This method implements the new interface by calling the old method. Note
|
|
that the input types are different between the new and old apis and this
|
|
is a bridge between the two.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setupTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This method implements the new interface by calling the old method. Note
|
|
that the input types are different between the new and old apis and this
|
|
is a bridge between the two.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="needsTaskCommit" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This method implements the new interface by calling the old method. Note
|
|
that the input types are different between the new and old apis and this
|
|
is a bridge between the two.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="commitTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This method implements the new interface by calling the old method. Note
|
|
that the input types are different between the new and old apis and this
|
|
is a bridge between the two.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="abortTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This method implements the new interface by calling the old method. Note
|
|
that the input types are different between the new and old apis and this
|
|
is a bridge between the two.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>OutputCommitter</code> describes the commit of task output for a
|
|
Map-Reduce job.
|
|
|
|
<p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of
|
|
the job to:<p>
|
|
<ol>
|
|
<li>
|
|
Setup the job during initialization. For example, create the temporary
|
|
output directory for the job during the initialization of the job.
|
|
</li>
|
|
<li>
|
|
Cleanup the job after the job completion. For example, remove the
|
|
temporary output directory after the job completion.
|
|
</li>
|
|
<li>
|
|
Setup the task temporary output.
|
|
</li>
|
|
<li>
|
|
Check whether a task needs a commit. This is to avoid the commit
|
|
procedure if a task does not need commit.
|
|
</li>
|
|
<li>
|
|
Commit of the task output.
|
|
</li>
|
|
<li>
|
|
Discard the task commit.
|
|
</li>
|
|
</ol>
|
|
|
|
@see FileOutputCommitter
|
|
@see JobContext
|
|
@see TaskAttemptContext
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.OutputCommitter} instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.OutputCommitter -->
|
|
<!-- start interface org.apache.hadoop.mapred.OutputFormat -->
|
|
<interface name="OutputFormat" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.OutputFormat} instead.">
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link RecordWriter} for the given job.
|
|
|
|
@param ignored
|
|
@param job configuration for the job whose output is being written.
|
|
@param name the unique name for this part of the output.
|
|
@param progress mechanism for reporting progress while writing to file.
|
|
@return a {@link RecordWriter} to write the output for the job.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Check for validity of the output-specification for the job.
|
|
|
|
<p>This is to validate the output specification for the job when it is
|
|
a job is submitted. Typically checks that it does not already exist,
|
|
throwing an exception when it already exists, so that output is not
|
|
overwritten.</p>
|
|
|
|
@param ignored
|
|
@param job job configuration.
|
|
@throws IOException when output should not be attempted]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>OutputFormat</code> describes the output-specification for a
|
|
Map-Reduce job.
|
|
|
|
<p>The Map-Reduce framework relies on the <code>OutputFormat</code> of the
|
|
job to:<p>
|
|
<ol>
|
|
<li>
|
|
Validate the output-specification of the job. For e.g. check that the
|
|
output directory doesn't already exist.
|
|
<li>
|
|
Provide the {@link RecordWriter} implementation to be used to write out
|
|
the output files of the job. Output files are stored in a
|
|
{@link FileSystem}.
|
|
</li>
|
|
</ol>
|
|
|
|
@see RecordWriter
|
|
@see JobConf
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.OutputFormat} instead.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.OutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.OutputLogFilter -->
|
|
<class name="OutputLogFilter" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.fs.PathFilter"/>
|
|
<constructor name="OutputLogFilter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="accept" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class filters log files from directory given
|
|
It doesnt accept paths having _logs.
|
|
This can be used to list paths of output directory as follows:
|
|
Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir,
|
|
new OutputLogFilter()));]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.OutputLogFilter -->
|
|
<!-- start interface org.apache.hadoop.mapred.Partitioner -->
|
|
<interface name="Partitioner" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.Partitioner} instead.">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<method name="getPartition" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<param name="numPartitions" type="int"/>
|
|
<doc>
|
|
<![CDATA[Get the paritition number for a given key (hence record) given the total
|
|
number of partitions i.e. number of reduce-tasks for the job.
|
|
|
|
<p>Typically a hash function on a all or a subset of the key.</p>
|
|
|
|
@param key the key to be paritioned.
|
|
@param value the entry value.
|
|
@param numPartitions the total number of partitions.
|
|
@return the partition number for the <code>key</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Partitions the key space.
|
|
|
|
<p><code>Partitioner</code> controls the partitioning of the keys of the
|
|
intermediate map-outputs. The key (or a subset of the key) is used to derive
|
|
the partition, typically by a hash function. The total number of partitions
|
|
is the same as the number of reduce tasks for the job. Hence this controls
|
|
which of the <code>m</code> reduce tasks the intermediate key (and hence the
|
|
record) is sent for reduction.</p>
|
|
|
|
@see Reducer
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.Partitioner} instead.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.Partitioner -->
|
|
<!-- start interface org.apache.hadoop.mapred.RawKeyValueIterator -->
|
|
<interface name="RawKeyValueIterator" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="getKey" return="org.apache.hadoop.io.DataInputBuffer"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets the current raw key.
|
|
|
|
@return Gets the current raw key as a DataInputBuffer
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getValue" return="org.apache.hadoop.io.DataInputBuffer"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets the current raw value.
|
|
|
|
@return Gets the current raw value as a DataInputBuffer
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Sets up the current key and value (for getKey and getValue).
|
|
|
|
@return <code>true</code> if there exists a key/value,
|
|
<code>false</code> otherwise.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Closes the iterator so that the underlying streams can be closed.
|
|
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="org.apache.hadoop.util.Progress"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Gets the Progress object; this has a float (0.0 - 1.0)
|
|
indicating the bytes processed by the iterator so far]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>RawKeyValueIterator</code> is an iterator used to iterate over
|
|
the raw keys and values during sort/merge of intermediate data.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.RawKeyValueIterator -->
|
|
<!-- start interface org.apache.hadoop.mapred.RecordReader -->
|
|
<interface name="RecordReader" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Reads the next key/value pair from the input for processing.
|
|
|
|
@param key the key to read data into
|
|
@param value the value to read data into
|
|
@return true iff a key/value was read, false if at EOF]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createKey" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create an object of the appropriate type to be used as a key.
|
|
|
|
@return a new key object.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValue" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create an object of the appropriate type to be used as a value.
|
|
|
|
@return a new value object.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Returns the current position in the input.
|
|
|
|
@return the current position in the input.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close this {@link InputSplit} to future operations.
|
|
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[How much of the input has the {@link RecordReader} consumed i.e.
|
|
has been processed by?
|
|
|
|
@return progress from <code>0.0</code> to <code>1.0</code>.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>RecordReader</code> reads <key, value> pairs from an
|
|
{@link InputSplit}.
|
|
|
|
<p><code>RecordReader</code>, typically, converts the byte-oriented view of
|
|
the input, provided by the <code>InputSplit</code>, and presents a
|
|
record-oriented view for the {@link Mapper} & {@link Reducer} tasks for
|
|
processing. It thus assumes the responsibility of processing record
|
|
boundaries and presenting the tasks with keys and values.</p>
|
|
|
|
@see InputSplit
|
|
@see InputFormat]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.RecordReader -->
|
|
<!-- start interface org.apache.hadoop.mapred.RecordWriter -->
|
|
<interface name="RecordWriter" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Writes a key/value pair.
|
|
|
|
@param key the key to write.
|
|
@param value the value to write.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close this <code>RecordWriter</code> to future operations.
|
|
|
|
@param reporter facility to report progress.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>RecordWriter</code> writes the output <key, value> pairs
|
|
to an output file.
|
|
|
|
<p><code>RecordWriter</code> implementations write the job outputs to the
|
|
{@link FileSystem}.
|
|
|
|
@see OutputFormat]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.RecordWriter -->
|
|
<!-- start interface org.apache.hadoop.mapred.Reducer -->
|
|
<interface name="Reducer" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.Reducer} instead.">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<implements name="org.apache.hadoop.io.Closeable"/>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="values" type="java.util.Iterator"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[<i>Reduces</i> values for a given key.
|
|
|
|
<p>The framework calls this method for each
|
|
<code><key, (list of values)></code> pair in the grouped inputs.
|
|
Output values must be of the same type as input values. Input keys must
|
|
not be altered. The framework will <b>reuse</b> the key and value objects
|
|
that are passed into the reduce, therefore the application should clone
|
|
the objects they want to keep a copy of. In many cases, all values are
|
|
combined into zero or one value.
|
|
</p>
|
|
|
|
<p>Output pairs are collected with calls to
|
|
{@link OutputCollector#collect(Object,Object)}.</p>
|
|
|
|
<p>Applications can use the {@link Reporter} provided to report progress
|
|
or just indicate that they are alive. In scenarios where the application
|
|
takes an insignificant amount of time to process individual key/value
|
|
pairs, this is crucial since the framework might assume that the task has
|
|
timed-out and kill that task. The other way of avoiding this is to set
|
|
<a href="{@docRoot}/../mapred-default.html#mapred.task.timeout">
|
|
mapred.task.timeout</a> to a high-enough value (or even zero for no
|
|
time-outs).</p>
|
|
|
|
@param key the key.
|
|
@param values the list of values to reduce.
|
|
@param output to collect keys and combined values.
|
|
@param reporter facility to report progress.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Reduces a set of intermediate values which share a key to a smaller set of
|
|
values.
|
|
|
|
<p>The number of <code>Reducer</code>s for the job is set by the user via
|
|
{@link JobConf#setNumReduceTasks(int)}. <code>Reducer</code> implementations
|
|
can access the {@link JobConf} for the job via the
|
|
{@link JobConfigurable#configure(JobConf)} method and initialize themselves.
|
|
Similarly they can use the {@link Closeable#close()} method for
|
|
de-initialization.</p>
|
|
|
|
<p><code>Reducer</code> has 3 primary phases:</p>
|
|
<ol>
|
|
<li>
|
|
|
|
<h4 id="Shuffle">Shuffle</h4>
|
|
|
|
<p><code>Reducer</code> is input the grouped output of a {@link Mapper}.
|
|
In the phase the framework, for each <code>Reducer</code>, fetches the
|
|
relevant partition of the output of all the <code>Mapper</code>s, via HTTP.
|
|
</p>
|
|
</li>
|
|
|
|
<li>
|
|
<h4 id="Sort">Sort</h4>
|
|
|
|
<p>The framework groups <code>Reducer</code> inputs by <code>key</code>s
|
|
(since different <code>Mapper</code>s may have output the same key) in this
|
|
stage.</p>
|
|
|
|
<p>The shuffle and sort phases occur simultaneously i.e. while outputs are
|
|
being fetched they are merged.</p>
|
|
|
|
<h5 id="SecondarySort">SecondarySort</h5>
|
|
|
|
<p>If equivalence rules for keys while grouping the intermediates are
|
|
different from those for grouping keys before reduction, then one may
|
|
specify a <code>Comparator</code> via
|
|
{@link JobConf#setOutputValueGroupingComparator(Class)}.Since
|
|
{@link JobConf#setOutputKeyComparatorClass(Class)} can be used to
|
|
control how intermediate keys are grouped, these can be used in conjunction
|
|
to simulate <i>secondary sort on values</i>.</p>
|
|
|
|
|
|
For example, say that you want to find duplicate web pages and tag them
|
|
all with the url of the "best" known example. You would set up the job
|
|
like:
|
|
<ul>
|
|
<li>Map Input Key: url</li>
|
|
<li>Map Input Value: document</li>
|
|
<li>Map Output Key: document checksum, url pagerank</li>
|
|
<li>Map Output Value: url</li>
|
|
<li>Partitioner: by checksum</li>
|
|
<li>OutputKeyComparator: by checksum and then decreasing pagerank</li>
|
|
<li>OutputValueGroupingComparator: by checksum</li>
|
|
</ul>
|
|
</li>
|
|
|
|
<li>
|
|
<h4 id="Reduce">Reduce</h4>
|
|
|
|
<p>In this phase the
|
|
{@link #reduce(Object, Iterator, OutputCollector, Reporter)}
|
|
method is called for each <code><key, (list of values)></code> pair in
|
|
the grouped inputs.</p>
|
|
<p>The output of the reduce task is typically written to the
|
|
{@link FileSystem} via
|
|
{@link OutputCollector#collect(Object, Object)}.</p>
|
|
</li>
|
|
</ol>
|
|
|
|
<p>The output of the <code>Reducer</code> is <b>not re-sorted</b>.</p>
|
|
|
|
<p>Example:</p>
|
|
<p><blockquote><pre>
|
|
public class MyReducer<K extends WritableComparable, V extends Writable>
|
|
extends MapReduceBase implements Reducer<K, V, K, V> {
|
|
|
|
static enum MyCounters { NUM_RECORDS }
|
|
|
|
private String reduceTaskId;
|
|
private int noKeys = 0;
|
|
|
|
public void configure(JobConf job) {
|
|
reduceTaskId = job.get("mapred.task.id");
|
|
}
|
|
|
|
public void reduce(K key, Iterator<V> values,
|
|
OutputCollector<K, V> output,
|
|
Reporter reporter)
|
|
throws IOException {
|
|
|
|
// Process
|
|
int noValues = 0;
|
|
while (values.hasNext()) {
|
|
V value = values.next();
|
|
|
|
// Increment the no. of values for this key
|
|
++noValues;
|
|
|
|
// Process the <key, value> pair (assume this takes a while)
|
|
// ...
|
|
// ...
|
|
|
|
// Let the framework know that we are alive, and kicking!
|
|
if ((noValues%10) == 0) {
|
|
reporter.progress();
|
|
}
|
|
|
|
// Process some more
|
|
// ...
|
|
// ...
|
|
|
|
// Output the <key, value>
|
|
output.collect(key, value);
|
|
}
|
|
|
|
// Increment the no. of <key, list of values> pairs processed
|
|
++noKeys;
|
|
|
|
// Increment counters
|
|
reporter.incrCounter(NUM_RECORDS, 1);
|
|
|
|
// Every 100 keys update application-level status
|
|
if ((noKeys%100) == 0) {
|
|
reporter.setStatus(reduceTaskId + " processed " + noKeys);
|
|
}
|
|
}
|
|
}
|
|
</pre></blockquote></p>
|
|
|
|
@see Mapper
|
|
@see Partitioner
|
|
@see Reporter
|
|
@see MapReduceBase
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.Reducer} instead.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.Reducer -->
|
|
<!-- start interface org.apache.hadoop.mapred.Reporter -->
|
|
<interface name="Reporter" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Progressable"/>
|
|
<method name="setStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="status" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the status description for the task.
|
|
|
|
@param status brief description of the current status.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.Enum"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Counter} of the given group with the given name.
|
|
|
|
@param name counter name
|
|
@return the <code>Counter</code> of the given group/name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="group" type="java.lang.String"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Counter} of the given group with the given name.
|
|
|
|
@param group counter group
|
|
@param name counter name
|
|
@return the <code>Counter</code> of the given group/name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="incrCounter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Enum"/>
|
|
<param name="amount" type="long"/>
|
|
<doc>
|
|
<![CDATA[Increments the counter identified by the key, which can be of
|
|
any {@link Enum} type, by the specified amount.
|
|
|
|
@param key key to identify the counter to be incremented. The key can be
|
|
be any <code>Enum</code>.
|
|
@param amount A non-negative amount by which the counter is to
|
|
be incremented.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="incrCounter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="group" type="java.lang.String"/>
|
|
<param name="counter" type="java.lang.String"/>
|
|
<param name="amount" type="long"/>
|
|
<doc>
|
|
<![CDATA[Increments the counter identified by the group and counter name
|
|
by the specified amount.
|
|
|
|
@param group name to identify the group of the counter to be incremented.
|
|
@param counter name to identify the counter within the group.
|
|
@param amount A non-negative amount by which the counter is to
|
|
be incremented.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputSplit" return="org.apache.hadoop.mapred.InputSplit"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="UnsupportedOperationException" type="java.lang.UnsupportedOperationException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link InputSplit} object for a map.
|
|
|
|
@return the <code>InputSplit</code> that the map is reading from.
|
|
@throws UnsupportedOperationException if called outside a mapper]]>
|
|
</doc>
|
|
</method>
|
|
<field name="NULL" type="org.apache.hadoop.mapred.Reporter"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[A constant of Reporter type that does nothing.]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A facility for Map-Reduce applications to report progress and update
|
|
counters, status information etc.
|
|
|
|
<p>{@link Mapper} and {@link Reducer} can use the <code>Reporter</code>
|
|
provided to report progress or just indicate that they are alive. In
|
|
scenarios where the application takes an insignificant amount of time to
|
|
process individual key/value pairs, this is crucial since the framework
|
|
might assume that the task has timed-out and kill that task.
|
|
|
|
<p>Applications can also update {@link Counters} via the provided
|
|
<code>Reporter</code> .</p>
|
|
|
|
@see Progressable
|
|
@see Counters]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.Reporter -->
|
|
<!-- start interface org.apache.hadoop.mapred.RunningJob -->
|
|
<interface name="RunningJob" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="getID" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the job identifier.
|
|
|
|
@return the job identifier.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobID" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="This method is deprecated and will be removed. Applications should
|
|
rather use {@link #getID()}.">
|
|
<doc>
|
|
<![CDATA[@deprecated This method is deprecated and will be removed. Applications should
|
|
rather use {@link #getID()}.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the name of the job.
|
|
|
|
@return the name of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobFile" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the path of the submitted job configuration.
|
|
|
|
@return the path of the submitted job configuration.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTrackingURL" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the URL where some job progress information will be displayed.
|
|
|
|
@return the URL where some job progress information will be displayed.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="mapProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the <i>progress</i> of the job's map-tasks, as a float between 0.0
|
|
and 1.0. When all map tasks have completed, the function returns 1.0.
|
|
|
|
@return the progress of the job's map-tasks.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reduceProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the <i>progress</i> of the job's reduce-tasks, as a float between 0.0
|
|
and 1.0. When all reduce tasks have completed, the function returns 1.0.
|
|
|
|
@return the progress of the job's reduce-tasks.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanupProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the <i>progress</i> of the job's cleanup-tasks, as a float between 0.0
|
|
and 1.0. When all cleanup tasks have completed, the function returns 1.0.
|
|
|
|
@return the progress of the job's cleanup-tasks.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setupProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the <i>progress</i> of the job's setup-tasks, as a float between 0.0
|
|
and 1.0. When all setup tasks have completed, the function returns 1.0.
|
|
|
|
@return the progress of the job's setup-tasks.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isComplete" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Check if the job is finished or not.
|
|
This is a non-blocking call.
|
|
|
|
@return <code>true</code> if the job is complete, else <code>false</code>.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isSuccessful" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Check if the job completed successfully.
|
|
|
|
@return <code>true</code> if the job succeeded, else <code>false</code>.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="waitForCompletion"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Blocks until the job is complete.
|
|
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobState" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Returns the current state of the Job.
|
|
{@link JobStatus}
|
|
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="killJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Kill the running job. Blocks until all job tasks have been
|
|
killed as well. If the job is no longer running, it simply returns.
|
|
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobPriority"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="priority" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Set the priority of a running job.
|
|
@param priority the new priority for the job.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskCompletionEvents" return="org.apache.hadoop.mapred.TaskCompletionEvent[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="startFrom" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get events indicating completion (success/failure) of component tasks.
|
|
|
|
@param startFrom index to start fetching events from
|
|
@return an array of {@link TaskCompletionEvent}s
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="killTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="shouldFail" type="boolean"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Kill indicated task attempt.
|
|
|
|
@param taskId the id of the task to be terminated.
|
|
@param shouldFail if true the task is failed and added to failed tasks
|
|
list, otherwise it is just killed, w/o affecting
|
|
job failure status.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="killTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Applications should rather use {@link #killTask(TaskAttemptID, boolean)}">
|
|
<param name="taskId" type="java.lang.String"/>
|
|
<param name="shouldFail" type="boolean"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@deprecated Applications should rather use {@link #killTask(TaskAttemptID, boolean)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounters" return="org.apache.hadoop.mapred.Counters"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets the counters for this job.
|
|
|
|
@return the counters for this job.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskDiagnostics" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets the diagnostic messages for a given task attempt.
|
|
@param taskid
|
|
@return the list of diagnostic messages for the task
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>RunningJob</code> is the user-interface to query for details on a
|
|
running Map-Reduce job.
|
|
|
|
<p>Clients can get hold of <code>RunningJob</code> via the {@link JobClient}
|
|
and then query the running-job for details such as name, configuration,
|
|
progress etc.</p>
|
|
|
|
@see JobClient]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.RunningJob -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat -->
|
|
<class name="SequenceFileAsBinaryInputFormat" extends="org.apache.hadoop.mapred.SequenceFileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileAsBinaryInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[InputFormat reading keys, values from SequenceFiles in binary (raw)
|
|
format.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader -->
|
|
<class name="SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordReader"/>
|
|
<constructor name="SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="createKey" return="org.apache.hadoop.io.BytesWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="createValue" return="org.apache.hadoop.io.BytesWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getKeyClassName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Retrieve the name of the key class for this SequenceFile.
|
|
@see org.apache.hadoop.io.SequenceFile.Reader#getKeyClassName]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getValueClassName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Retrieve the name of the value class for this SequenceFile.
|
|
@see org.apache.hadoop.io.SequenceFile.Reader#getValueClassName]]>
|
|
</doc>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.BytesWritable"/>
|
|
<param name="val" type="org.apache.hadoop.io.BytesWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read raw bytes from a SequenceFile.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Return the progress within the input split
|
|
@return 0.0 to 1.0 of the input byte range]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Read records from a SequenceFile as binary (raw) bytes.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat -->
|
|
<class name="SequenceFileAsBinaryOutputFormat" extends="org.apache.hadoop.mapred.SequenceFileOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileAsBinaryOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setSequenceFileOutputKeyClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the key class for the {@link SequenceFile}
|
|
<p>This allows the user to specify the key class to be different
|
|
from the actual class ({@link BytesWritable}) used for writing </p>
|
|
|
|
@param conf the {@link JobConf} to modify
|
|
@param theClass the SequenceFile output key class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSequenceFileOutputValueClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the value class for the {@link SequenceFile}
|
|
<p>This allows the user to specify the value class to be different
|
|
from the actual class ({@link BytesWritable}) used for writing </p>
|
|
|
|
@param conf the {@link JobConf} to modify
|
|
@param theClass the SequenceFile output key class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSequenceFileOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the key class for the {@link SequenceFile}
|
|
|
|
@return the key class of the {@link SequenceFile}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSequenceFileOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the value class for the {@link SequenceFile}
|
|
|
|
@return the value class of the {@link SequenceFile}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link OutputFormat} that writes keys, values to
|
|
{@link SequenceFile}s in binary(raw) format]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat.WritableValueBytes -->
|
|
<class name="SequenceFileAsBinaryOutputFormat.WritableValueBytes" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.SequenceFile.ValueBytes"/>
|
|
<constructor name="SequenceFileAsBinaryOutputFormat.WritableValueBytes"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="SequenceFileAsBinaryOutputFormat.WritableValueBytes" type="org.apache.hadoop.io.BytesWritable"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="value" type="org.apache.hadoop.io.BytesWritable"/>
|
|
</method>
|
|
<method name="writeUncompressedBytes"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="outStream" type="java.io.DataOutputStream"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="writeCompressedBytes"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="outStream" type="java.io.DataOutputStream"/>
|
|
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getSize" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Inner class used for appendRaw]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat.WritableValueBytes -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileAsTextInputFormat -->
|
|
<class name="SequenceFileAsTextInputFormat" extends="org.apache.hadoop.mapred.SequenceFileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileAsTextInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class is similar to SequenceFileInputFormat, except it generates SequenceFileAsTextRecordReader
|
|
which converts the input keys and values to their String forms by calling toString() method.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileAsTextInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileAsTextRecordReader -->
|
|
<class name="SequenceFileAsTextRecordReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordReader"/>
|
|
<constructor name="SequenceFileAsTextRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="createKey" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="createValue" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.Text"/>
|
|
<param name="value" type="org.apache.hadoop.io.Text"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read key/value pair in a line.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class converts the input keys and values to their String forms by calling toString()
|
|
method. This class to SequenceFileAsTextInputFormat class is as LineRecordReader
|
|
class to TextInputFormat class.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileAsTextRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter -->
|
|
<class name="SequenceFileInputFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileInputFilter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Create a record reader for the given split
|
|
@param split file split
|
|
@param job job configuration
|
|
@param reporter reporter who sends report to task tracker
|
|
@return RecordReader]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setFilterClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="filterClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[set the filter class
|
|
|
|
@param conf application configuration
|
|
@param filterClass filter class]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A class that allows a map/red job to work on a sample of sequence files.
|
|
The sample is decided by the filter class set by the job.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter -->
|
|
<!-- start interface org.apache.hadoop.mapred.SequenceFileInputFilter.Filter -->
|
|
<interface name="SequenceFileInputFilter.Filter" abstract="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.conf.Configurable"/>
|
|
<method name="accept" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[filter function
|
|
Decide if a record should be filtered or not
|
|
@param key record key
|
|
@return true if a record is accepted; return false otherwise]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[filter interface]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.SequenceFileInputFilter.Filter -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase -->
|
|
<class name="SequenceFileInputFilter.FilterBase" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.SequenceFileInputFilter.Filter"/>
|
|
<constructor name="SequenceFileInputFilter.FilterBase"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[base class for Filters]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.MD5Filter -->
|
|
<class name="SequenceFileInputFilter.MD5Filter" extends="org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileInputFilter.MD5Filter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setFrequency"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="frequency" type="int"/>
|
|
<doc>
|
|
<![CDATA[set the filtering frequency in configuration
|
|
|
|
@param conf configuration
|
|
@param frequency filtering frequency]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[configure the filter according to configuration
|
|
|
|
@param conf configuration]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Filtering method
|
|
If MD5(key) % frequency==0, return true; otherwise return false
|
|
@see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)]]>
|
|
</doc>
|
|
</method>
|
|
<field name="MD5_LEN" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class returns a set of records by examing the MD5 digest of its
|
|
key against a filtering frequency <i>f</i>. The filtering criteria is
|
|
MD5(key) % f == 0.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.MD5Filter -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.PercentFilter -->
|
|
<class name="SequenceFileInputFilter.PercentFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileInputFilter.PercentFilter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setFrequency"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="frequency" type="int"/>
|
|
<doc>
|
|
<![CDATA[set the frequency and stores it in conf
|
|
@param conf configuration
|
|
@param frequency filtering frequencey]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[configure the filter by checking the configuration
|
|
|
|
@param conf configuration]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Filtering method
|
|
If record# % frequency==0, return true; otherwise return false
|
|
@see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class returns a percentage of records
|
|
The percentage is determined by a filtering frequency <i>f</i> using
|
|
the criteria record# % f == 0.
|
|
For example, if the frequency is 10, one out of 10 records is returned.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.PercentFilter -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.RegexFilter -->
|
|
<class name="SequenceFileInputFilter.RegexFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileInputFilter.RegexFilter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setPattern"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="regex" type="java.lang.String"/>
|
|
<exception name="PatternSyntaxException" type="java.util.regex.PatternSyntaxException"/>
|
|
<doc>
|
|
<![CDATA[Define the filtering regex and stores it in conf
|
|
@param conf where the regex is set
|
|
@param regex regex used as a filter]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[configure the Filter by checking the configuration]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Filtering method
|
|
If key matches the regex, return true; otherwise return false
|
|
@see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Records filter by matching key to regex]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.RegexFilter -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFormat -->
|
|
<class name="SequenceFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat}
|
|
instead.">
|
|
<constructor name="SequenceFileInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link InputFormat} for {@link SequenceFile}s.
|
|
@deprecated Use
|
|
{@link org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat}
|
|
instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileOutputFormat -->
|
|
<class name="SequenceFileOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat}
|
|
instead.">
|
|
<constructor name="SequenceFileOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getReaders" return="org.apache.hadoop.io.SequenceFile.Reader[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="dir" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Open the output generated by this format.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputCompressionType" return="org.apache.hadoop.io.SequenceFile.CompressionType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link CompressionType} for the output {@link SequenceFile}.
|
|
@param conf the {@link JobConf}
|
|
@return the {@link CompressionType} for the output {@link SequenceFile},
|
|
defaulting to {@link CompressionType#RECORD}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputCompressionType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="style" type="org.apache.hadoop.io.SequenceFile.CompressionType"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link CompressionType} for the output {@link SequenceFile}.
|
|
@param conf the {@link JobConf} to modify
|
|
@param style the {@link CompressionType} for the output
|
|
{@link SequenceFile}]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link OutputFormat} that writes {@link SequenceFile}s.
|
|
@deprecated Use
|
|
{@link org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat}
|
|
instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.SequenceFileRecordReader -->
|
|
<class name="SequenceFileRecordReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordReader"/>
|
|
<constructor name="SequenceFileRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="getKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The class of key that must be passed to {@link
|
|
#next(Object, Object)}..]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The class of value that must be passed to {@link
|
|
#next(Object, Object)}..]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createKey" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="createValue" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getCurrentValue"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="value" type="java.lang.Object"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Return the progress within the input split
|
|
@return 0.0 to 1.0 of the input byte range]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="seek"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="pos" type="long"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="conf" type="org.apache.hadoop.conf.Configuration"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[An {@link RecordReader} for {@link SequenceFile}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SequenceFileRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.SkipBadRecords -->
|
|
<class name="SkipBadRecords" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SkipBadRecords"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getAttemptsToStartSkipping" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Get the number of Task attempts AFTER which skip mode
|
|
will be kicked off. When skip mode is kicked off, the
|
|
tasks reports the range of records which it will process
|
|
next to the TaskTracker. So that on failures, TT knows which
|
|
ones are possibly the bad records. On further executions,
|
|
those are skipped.
|
|
Default value is 2.
|
|
|
|
@param conf the configuration
|
|
@return attemptsToStartSkipping no of task attempts]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setAttemptsToStartSkipping"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="attemptsToStartSkipping" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the number of Task attempts AFTER which skip mode
|
|
will be kicked off. When skip mode is kicked off, the
|
|
tasks reports the range of records which it will process
|
|
next to the TaskTracker. So that on failures, TT knows which
|
|
ones are possibly the bad records. On further executions,
|
|
those are skipped.
|
|
Default value is 2.
|
|
|
|
@param conf the configuration
|
|
@param attemptsToStartSkipping no of task attempts]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getAutoIncrMapperProcCount" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Get the flag which if set to true,
|
|
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS} is incremented
|
|
by MapRunner after invoking the map function. This value must be set to
|
|
false for applications which process the records asynchronously
|
|
or buffer the input records. For example streaming.
|
|
In such cases applications should increment this counter on their own.
|
|
Default value is true.
|
|
|
|
@param conf the configuration
|
|
@return <code>true</code> if auto increment
|
|
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}.
|
|
<code>false</code> otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setAutoIncrMapperProcCount"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="autoIncr" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set the flag which if set to true,
|
|
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS} is incremented
|
|
by MapRunner after invoking the map function. This value must be set to
|
|
false for applications which process the records asynchronously
|
|
or buffer the input records. For example streaming.
|
|
In such cases applications should increment this counter on their own.
|
|
Default value is true.
|
|
|
|
@param conf the configuration
|
|
@param autoIncr whether to auto increment
|
|
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getAutoIncrReducerProcCount" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Get the flag which if set to true,
|
|
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS} is incremented
|
|
by framework after invoking the reduce function. This value must be set to
|
|
false for applications which process the records asynchronously
|
|
or buffer the input records. For example streaming.
|
|
In such cases applications should increment this counter on their own.
|
|
Default value is true.
|
|
|
|
@param conf the configuration
|
|
@return <code>true</code> if auto increment
|
|
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}.
|
|
<code>false</code> otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setAutoIncrReducerProcCount"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="autoIncr" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set the flag which if set to true,
|
|
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS} is incremented
|
|
by framework after invoking the reduce function. This value must be set to
|
|
false for applications which process the records asynchronously
|
|
or buffer the input records. For example streaming.
|
|
In such cases applications should increment this counter on their own.
|
|
Default value is true.
|
|
|
|
@param conf the configuration
|
|
@param autoIncr whether to auto increment
|
|
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSkipOutputPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Get the directory to which skipped records are written. By default it is
|
|
the sub directory of the output _logs directory.
|
|
User can stop writing skipped records by setting the value null.
|
|
|
|
@param conf the configuration.
|
|
@return path skip output directory. Null is returned if this is not set
|
|
and output directory is also not set.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSkipOutputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Set the directory to which skipped records are written. By default it is
|
|
the sub directory of the output _logs directory.
|
|
User can stop writing skipped records by setting the value null.
|
|
|
|
@param conf the configuration.
|
|
@param path skip output directory path]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapperMaxSkipRecords" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Get the number of acceptable skip records surrounding the bad record PER
|
|
bad record in mapper. The number includes the bad record as well.
|
|
To turn the feature of detection/skipping of bad records off, set the
|
|
value to 0.
|
|
The framework tries to narrow down the skipped range by retrying
|
|
until this threshold is met OR all attempts get exhausted for this task.
|
|
Set the value to Long.MAX_VALUE to indicate that framework need not try to
|
|
narrow down. Whatever records(depends on application) get skipped are
|
|
acceptable.
|
|
Default value is 0.
|
|
|
|
@param conf the configuration
|
|
@return maxSkipRecs acceptable skip records.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapperMaxSkipRecords"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="maxSkipRecs" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the number of acceptable skip records surrounding the bad record PER
|
|
bad record in mapper. The number includes the bad record as well.
|
|
To turn the feature of detection/skipping of bad records off, set the
|
|
value to 0.
|
|
The framework tries to narrow down the skipped range by retrying
|
|
until this threshold is met OR all attempts get exhausted for this task.
|
|
Set the value to Long.MAX_VALUE to indicate that framework need not try to
|
|
narrow down. Whatever records(depends on application) get skipped are
|
|
acceptable.
|
|
Default value is 0.
|
|
|
|
@param conf the configuration
|
|
@param maxSkipRecs acceptable skip records.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReducerMaxSkipGroups" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[Get the number of acceptable skip groups surrounding the bad group PER
|
|
bad group in reducer. The number includes the bad group as well.
|
|
To turn the feature of detection/skipping of bad groups off, set the
|
|
value to 0.
|
|
The framework tries to narrow down the skipped range by retrying
|
|
until this threshold is met OR all attempts get exhausted for this task.
|
|
Set the value to Long.MAX_VALUE to indicate that framework need not try to
|
|
narrow down. Whatever groups(depends on application) get skipped are
|
|
acceptable.
|
|
Default value is 0.
|
|
|
|
@param conf the configuration
|
|
@return maxSkipGrps acceptable skip groups.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setReducerMaxSkipGroups"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="maxSkipGrps" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the number of acceptable skip groups surrounding the bad group PER
|
|
bad group in reducer. The number includes the bad group as well.
|
|
To turn the feature of detection/skipping of bad groups off, set the
|
|
value to 0.
|
|
The framework tries to narrow down the skipped range by retrying
|
|
until this threshold is met OR all attempts get exhausted for this task.
|
|
Set the value to Long.MAX_VALUE to indicate that framework need not try to
|
|
narrow down. Whatever groups(depends on application) get skipped are
|
|
acceptable.
|
|
Default value is 0.
|
|
|
|
@param conf the configuration
|
|
@param maxSkipGrps acceptable skip groups.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="COUNTER_GROUP" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Special counters which are written by the application and are
|
|
used by the framework for detecting bad records. For detecting bad records
|
|
these counters must be incremented by the application.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="COUNTER_MAP_PROCESSED_RECORDS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Number of processed map records.
|
|
@see SkipBadRecords#getAutoIncrMapperProcCount(Configuration)]]>
|
|
</doc>
|
|
</field>
|
|
<field name="COUNTER_REDUCE_PROCESSED_GROUPS" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Number of processed reduce groups.
|
|
@see SkipBadRecords#getAutoIncrReducerProcCount(Configuration)]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[Utility class for skip bad records functionality. It contains various
|
|
settings related to skipping of bad records.
|
|
|
|
<p>Hadoop provides an optional mode of execution in which the bad records
|
|
are detected and skipped in further attempts.
|
|
|
|
<p>This feature can be used when map/reduce tasks crashes deterministically on
|
|
certain input. This happens due to bugs in the map/reduce function. The usual
|
|
course would be to fix these bugs. But sometimes this is not possible;
|
|
perhaps the bug is in third party libraries for which the source code is
|
|
not available. Due to this, the task never reaches to completion even with
|
|
multiple attempts and complete data for that task is lost.</p>
|
|
|
|
<p>With this feature, only a small portion of data is lost surrounding
|
|
the bad record, which may be acceptable for some user applications.
|
|
see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}</p>
|
|
|
|
<p>The skipping mode gets kicked off after certain no of failures
|
|
see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}</p>
|
|
|
|
<p>In the skipping mode, the map/reduce task maintains the record range which
|
|
is getting processed at all times. Before giving the input to the
|
|
map/reduce function, it sends this record range to the Task tracker.
|
|
If task crashes, the Task tracker knows which one was the last reported
|
|
range. On further attempts that range get skipped.</p>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.SkipBadRecords -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskAttemptContext -->
|
|
<class name="TaskAttemptContext" extends="org.apache.hadoop.mapreduce.TaskAttemptContext"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.TaskAttemptContext}
|
|
instead.">
|
|
<method name="getTaskAttemptID" return="org.apache.hadoop.mapred.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the taskAttemptID.
|
|
|
|
@return TaskAttemptID]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgressible" return="org.apache.hadoop.util.Progressable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobConf" return="org.apache.hadoop.mapred.JobConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="progress"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[@deprecated Use {@link org.apache.hadoop.mapreduce.TaskAttemptContext}
|
|
instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskAttemptContext -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskAttemptID -->
|
|
<class name="TaskAttemptID" extends="org.apache.hadoop.mapreduce.TaskAttemptID"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskAttemptID" type="org.apache.hadoop.mapred.TaskID, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskAttemptID object from given {@link TaskID}.
|
|
@param taskId TaskID that this task belongs to
|
|
@param id the task attempt number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskAttemptID" type="java.lang.String, int, boolean, int, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskId object from given parts.
|
|
@param jtIdentifier jobTracker identifier
|
|
@param jobId job number
|
|
@param isMap whether the tip is a map
|
|
@param taskId taskId number
|
|
@param id the task attempt number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskAttemptID"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="downgrade" return="org.apache.hadoop.mapred.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="old" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
|
|
<doc>
|
|
<![CDATA[Downgrade a new TaskAttemptID to an old one
|
|
@param old the new id
|
|
@return either old or a new TaskAttemptID constructed to match old]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskID" return="org.apache.hadoop.mapred.TaskID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobID" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="read" return="org.apache.hadoop.mapred.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="forName" return="org.apache.hadoop.mapred.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="str" type="java.lang.String"/>
|
|
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
|
|
<doc>
|
|
<![CDATA[Construct a TaskAttemptID object from given string
|
|
@return constructed TaskAttemptID object or null if the given String is null
|
|
@throws IllegalArgumentException if the given string is malformed]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskAttemptIDsPattern" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jtIdentifier" type="java.lang.String"/>
|
|
<param name="jobId" type="java.lang.Integer"/>
|
|
<param name="isMap" type="java.lang.Boolean"/>
|
|
<param name="taskId" type="java.lang.Integer"/>
|
|
<param name="attemptId" type="java.lang.Integer"/>
|
|
<doc>
|
|
<![CDATA[Returns a regex pattern which matches task attempt IDs. Arguments can
|
|
be given null, in which case that part of the regex will be generic.
|
|
For example to obtain a regex matching <i>all task attempt IDs</i>
|
|
of <i>any jobtracker</i>, in <i>any job</i>, of the <i>first
|
|
map task</i>, we would use :
|
|
<pre>
|
|
TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
|
|
</pre>
|
|
which will return :
|
|
<pre> "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" </pre>
|
|
@param jtIdentifier jobTracker identifier, or null
|
|
@param jobId job number, or null
|
|
@param isMap whether the tip is a map, or null
|
|
@param taskId taskId number, or null
|
|
@param attemptId the task attempt number, or null
|
|
@return a regex pattern matching TaskAttemptIDs]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[TaskAttemptID represents the immutable and unique identifier for
|
|
a task attempt. Each task attempt is one particular instance of a Map or
|
|
Reduce Task identified by its TaskID.
|
|
|
|
TaskAttemptID consists of 2 parts. First part is the
|
|
{@link TaskID}, that this TaskAttemptID belongs to.
|
|
Second part is the task attempt number. <br>
|
|
An example TaskAttemptID is :
|
|
<code>attempt_200707121733_0003_m_000005_0</code> , which represents the
|
|
zeroth task attempt for the fifth map task in the third job
|
|
running at the jobtracker started at <code>200707121733</code>.
|
|
<p>
|
|
Applications should never construct or parse TaskAttemptID strings
|
|
, but rather use appropriate constructors or {@link #forName(String)}
|
|
method.
|
|
|
|
@see JobID
|
|
@see TaskID]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskAttemptID -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskCompletionEvent -->
|
|
<class name="TaskCompletionEvent" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="TaskCompletionEvent"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Default constructor for Writable.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskCompletionEvent" type="int, org.apache.hadoop.mapred.TaskAttemptID, int, boolean, org.apache.hadoop.mapred.TaskCompletionEvent.Status, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructor. eventId should be created externally and incremented
|
|
per event for each job.
|
|
@param eventId event id, event id should be unique and assigned in
|
|
incrementally, starting from 0.
|
|
@param taskId task id
|
|
@param status task's status
|
|
@param taskTrackerHttp task tracker's host:port for http.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getEventId" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns event Id.
|
|
@return event id]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskId" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="use {@link #getTaskAttemptId()} instead.">
|
|
<doc>
|
|
<![CDATA[Returns task id.
|
|
@return task id
|
|
@deprecated use {@link #getTaskAttemptId()} instead.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskAttemptId" return="org.apache.hadoop.mapred.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns task id.
|
|
@return task id]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskStatus" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns enum Status.SUCESS or Status.FAILURE.
|
|
@return task tracker status]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskTrackerHttp" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[http location of the tasktracker where this task ran.
|
|
@return http location of tasktracker user logs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskRunTime" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns time (in millisec) the task took to complete.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskRunTime"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskCompletionTime" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the task completion time
|
|
@param taskCompletionTime time (in millisec) the task took to complete]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setEventId"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="eventId" type="int"/>
|
|
<doc>
|
|
<![CDATA[set event Id. should be assigned incrementally starting from 0.
|
|
@param eventId]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskId"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="use {@link #setTaskID(TaskAttemptID)} instead.">
|
|
<param name="taskId" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Sets task id.
|
|
@param taskId
|
|
@deprecated use {@link #setTaskID(TaskAttemptID)} instead.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<doc>
|
|
<![CDATA[Sets task id.
|
|
@param taskId]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="status" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status"/>
|
|
<doc>
|
|
<![CDATA[Set task status.
|
|
@param status]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskTrackerHttp"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskTrackerHttp" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set task tracker http location.
|
|
@param taskTrackerHttp]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="isMapTask" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="idWithinJob" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="EMPTY_ARRAY" type="org.apache.hadoop.mapred.TaskCompletionEvent[]"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This is used to track task completion events on
|
|
job tracker.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskCompletionEvent -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskCompletionEvent.Status -->
|
|
<class name="TaskCompletionEvent.Status" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<field name="FAILED" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="KILLED" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="SUCCEEDED" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="OBSOLETE" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="TIPFAILED" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskCompletionEvent.Status -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskGraphServlet -->
|
|
<class name="TaskGraphServlet" extends="javax.servlet.http.HttpServlet"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskGraphServlet"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="doGet"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="request" type="javax.servlet.http.HttpServletRequest"/>
|
|
<param name="response" type="javax.servlet.http.HttpServletResponse"/>
|
|
<exception name="ServletException" type="javax.servlet.ServletException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="width" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[height of the graph w/o margins]]>
|
|
</doc>
|
|
</field>
|
|
<field name="height" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[height of the graph w/o margins]]>
|
|
</doc>
|
|
</field>
|
|
<field name="ymargin" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[margin space on y axis]]>
|
|
</doc>
|
|
</field>
|
|
<field name="xmargin" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[margin space on x axis]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[The servlet that outputs svg graphics for map / reduce task
|
|
statuses]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskGraphServlet -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskID -->
|
|
<class name="TaskID" extends="org.apache.hadoop.mapreduce.TaskID"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, boolean, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskID object from given {@link JobID}.
|
|
@param jobId JobID that this tip belongs to
|
|
@param isMap whether the tip is a map
|
|
@param id the tip number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskID" type="java.lang.String, int, boolean, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskInProgressId object from given parts.
|
|
@param jtIdentifier jobTracker identifier
|
|
@param jobId job number
|
|
@param isMap whether the tip is a map
|
|
@param id the tip number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskID"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="downgrade" return="org.apache.hadoop.mapred.TaskID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="old" type="org.apache.hadoop.mapreduce.TaskID"/>
|
|
<doc>
|
|
<![CDATA[Downgrade a new TaskID to an old one
|
|
@param old a new or old TaskID
|
|
@return either old or a new TaskID build to match old]]>
|
|
</doc>
|
|
</method>
|
|
<method name="read" return="org.apache.hadoop.mapred.TaskID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getJobID" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getTaskIDsPattern" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jtIdentifier" type="java.lang.String"/>
|
|
<param name="jobId" type="java.lang.Integer"/>
|
|
<param name="isMap" type="java.lang.Boolean"/>
|
|
<param name="taskId" type="java.lang.Integer"/>
|
|
<doc>
|
|
<![CDATA[Returns a regex pattern which matches task IDs. Arguments can
|
|
be given null, in which case that part of the regex will be generic.
|
|
For example to obtain a regex matching <i>the first map task</i>
|
|
of <i>any jobtracker</i>, of <i>any job</i>, we would use :
|
|
<pre>
|
|
TaskID.getTaskIDsPattern(null, null, true, 1);
|
|
</pre>
|
|
which will return :
|
|
<pre> "task_[^_]*_[0-9]*_m_000001*" </pre>
|
|
@param jtIdentifier jobTracker identifier, or null
|
|
@param jobId job number, or null
|
|
@param isMap whether the tip is a map, or null
|
|
@param taskId taskId number, or null
|
|
@return a regex pattern matching TaskIDs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="forName" return="org.apache.hadoop.mapred.TaskID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="str" type="java.lang.String"/>
|
|
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[TaskID represents the immutable and unique identifier for
|
|
a Map or Reduce Task. Each TaskID encompasses multiple attempts made to
|
|
execute the Map or Reduce Task, each of which are uniquely indentified by
|
|
their TaskAttemptID.
|
|
|
|
TaskID consists of 3 parts. First part is the {@link JobID}, that this
|
|
TaskInProgress belongs to. Second part of the TaskID is either 'm' or 'r'
|
|
representing whether the task is a map task or a reduce task.
|
|
And the third part is the task number. <br>
|
|
An example TaskID is :
|
|
<code>task_200707121733_0003_m_000005</code> , which represents the
|
|
fifth map task in the third job running at the jobtracker
|
|
started at <code>200707121733</code>.
|
|
<p>
|
|
Applications should never construct or parse TaskID strings
|
|
, but rather use appropriate constructors or {@link #forName(String)}
|
|
method.
|
|
|
|
@see JobID
|
|
@see TaskAttemptID]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskID -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskLog -->
|
|
<class name="TaskLog" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskLog"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getTaskLogFile" return="java.io.File"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="filter" type="org.apache.hadoop.mapred.TaskLog.LogName"/>
|
|
</method>
|
|
<method name="getRealTaskLogFileLocation" return="java.io.File"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="filter" type="org.apache.hadoop.mapred.TaskLog.LogName"/>
|
|
</method>
|
|
<method name="getIndexFile" return="java.io.File"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getIndexFile" return="java.io.File"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="java.lang.String"/>
|
|
<param name="isCleanup" type="boolean"/>
|
|
</method>
|
|
<method name="syncLogs"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="firstTaskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="syncLogs"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="firstTaskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="isCleanup" type="boolean"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="cleanup"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="logsRetainHours" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Purge old user logs.
|
|
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskLogLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the desired maximum length of task's logs.
|
|
@param conf the job to look in
|
|
@return the number of bytes to cap the log files at]]>
|
|
</doc>
|
|
</method>
|
|
<method name="captureOutAndError" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cmd" type="java.util.List"/>
|
|
<param name="stdoutFilename" type="java.io.File"/>
|
|
<param name="stderrFilename" type="java.io.File"/>
|
|
<param name="tailLength" type="long"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Wrap a command in a shell to capture stdout and stderr to files.
|
|
If the tailLength is 0, the entire output will be saved.
|
|
@param cmd The command and the arguments that should be run
|
|
@param stdoutFilename The filename that stdout should be saved to
|
|
@param stderrFilename The filename that stderr should be saved to
|
|
@param tailLength The length of the tail to be saved.
|
|
@return the modified command that should be run]]>
|
|
</doc>
|
|
</method>
|
|
<method name="captureOutAndError" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="setup" type="java.util.List"/>
|
|
<param name="cmd" type="java.util.List"/>
|
|
<param name="stdoutFilename" type="java.io.File"/>
|
|
<param name="stderrFilename" type="java.io.File"/>
|
|
<param name="tailLength" type="long"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Wrap a command in a shell to capture stdout and stderr to files.
|
|
Setup commands such as setting memory limit can be passed which
|
|
will be executed before exec.
|
|
If the tailLength is 0, the entire output will be saved.
|
|
@param setup The setup commands for the execed process.
|
|
@param cmd The command and the arguments that should be run
|
|
@param stdoutFilename The filename that stdout should be saved to
|
|
@param stderrFilename The filename that stderr should be saved to
|
|
@param tailLength The length of the tail to be saved.
|
|
@return the modified command that should be run]]>
|
|
</doc>
|
|
</method>
|
|
<method name="captureOutAndError" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="setup" type="java.util.List"/>
|
|
<param name="cmd" type="java.util.List"/>
|
|
<param name="stdoutFilename" type="java.io.File"/>
|
|
<param name="stderrFilename" type="java.io.File"/>
|
|
<param name="tailLength" type="long"/>
|
|
<param name="pidFileName" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Wrap a command in a shell to capture stdout and stderr to files.
|
|
Setup commands such as setting memory limit can be passed which
|
|
will be executed before exec.
|
|
If the tailLength is 0, the entire output will be saved.
|
|
@param setup The setup commands for the execed process.
|
|
@param cmd The command and the arguments that should be run
|
|
@param stdoutFilename The filename that stdout should be saved to
|
|
@param stderrFilename The filename that stderr should be saved to
|
|
@param tailLength The length of the tail to be saved.
|
|
@param pidFileName The name of the pid-file
|
|
@return the modified command that should be run]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addCommand" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cmd" type="java.util.List"/>
|
|
<param name="isExecutable" type="boolean"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add quotes to each of the command strings and
|
|
return as a single string
|
|
@param cmd The command to be quoted
|
|
@param isExecutable makes shell path if the first
|
|
argument is executable
|
|
@return returns The quoted string.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="captureDebugOut" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cmd" type="java.util.List"/>
|
|
<param name="debugoutFilename" type="java.io.File"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Wrap a command in a shell to capture debug script's
|
|
stdout and stderr to debugout.
|
|
@param cmd The command and the arguments that should be run
|
|
@param debugoutFilename The filename that stdout and stderr
|
|
should be saved to.
|
|
@return the modified command that should be run
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A simple logger to handle the task-specific user logs.
|
|
This class uses the system property <code>hadoop.log.dir</code>.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskLog -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskLog.LogName -->
|
|
<class name="TaskLog.LogName" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapred.TaskLog.LogName[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapred.TaskLog.LogName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<field name="STDOUT" type="org.apache.hadoop.mapred.TaskLog.LogName"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Log on the stdout of the task.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="STDERR" type="org.apache.hadoop.mapred.TaskLog.LogName"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Log on the stderr of the task.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="SYSLOG" type="org.apache.hadoop.mapred.TaskLog.LogName"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Log on the map-reduce system logs of the task.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="PROFILE" type="org.apache.hadoop.mapred.TaskLog.LogName"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The java profiler information.]]>
|
|
</doc>
|
|
</field>
|
|
<field name="DEBUGOUT" type="org.apache.hadoop.mapred.TaskLog.LogName"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Log the debug script's stdout]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[The filter for userlogs.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskLog.LogName -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskLogAppender -->
|
|
<class name="TaskLogAppender" extends="org.apache.log4j.FileAppender"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskLogAppender"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="activateOptions"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="append"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="event" type="org.apache.log4j.spi.LoggingEvent"/>
|
|
</method>
|
|
<method name="flush"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getTaskId" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Getter/Setter methods for log4j.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setTaskId"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getTotalLogFileSize" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setTotalLogFileSize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="logSize" type="long"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A simple log4j-appender for the task child's
|
|
map-reduce system logs.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskLogAppender -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskLogServlet -->
|
|
<class name="TaskLogServlet" extends="javax.servlet.http.HttpServlet"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskLogServlet"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getTaskLogUrl" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskTrackerHostName" type="java.lang.String"/>
|
|
<param name="httpPort" type="java.lang.String"/>
|
|
<param name="taskAttemptID" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Construct the taskLogUrl
|
|
@param taskTrackerHostName
|
|
@param httpPort
|
|
@param taskAttemptID
|
|
@return the taskLogUrl]]>
|
|
</doc>
|
|
</method>
|
|
<method name="doGet"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="request" type="javax.servlet.http.HttpServletRequest"/>
|
|
<param name="response" type="javax.servlet.http.HttpServletResponse"/>
|
|
<exception name="ServletException" type="javax.servlet.ServletException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the logs via http.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A servlet that is run by the TaskTrackers to provide the task logs via http.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskLogServlet -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskReport -->
|
|
<class name="TaskReport" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="TaskReport"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getTaskId" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="use {@link #getTaskID()} instead">
|
|
<doc>
|
|
<![CDATA[@deprecated use {@link #getTaskID()} instead]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskID" return="org.apache.hadoop.mapred.TaskID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The id of the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The amount completed, between zero and one.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getState" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The most recent state, reported by a {@link Reporter}.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDiagnostics" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[A list of error messages.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounters" return="org.apache.hadoop.mapred.Counters"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[A table of counters.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentStatus" return="org.apache.hadoop.mapred.TIPStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The current status]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getFinishTime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get finish time of task.
|
|
@return 0, if finish time was not set else returns finish time.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getStartTime" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get start time of task.
|
|
@return 0 if start time was not set, else start time.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSuccessfulAttempt"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="t" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<doc>
|
|
<![CDATA[set successful attempt ID of the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSuccessfulTaskAttempt" return="org.apache.hadoop.mapred.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the attempt ID that took this task to completion]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setRunningTaskAttempts"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="runningAttempts" type="java.util.Collection"/>
|
|
<doc>
|
|
<![CDATA[set running attempt(s) of the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRunningTaskAttempts" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the running task attempt IDs for this task]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A report on the state of a task.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskReport -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskTracker -->
|
|
<class name="TaskTracker" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.MRConstants"/>
|
|
<implements name="org.apache.hadoop.mapred.TaskUmbilicalProtocol"/>
|
|
<implements name="java.lang.Runnable"/>
|
|
<constructor name="TaskTracker" type="org.apache.hadoop.mapred.JobConf"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Start with the local machine name, and the default JobTracker]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getTaskTrackerInstrumentation" return="org.apache.hadoop.mapred.TaskTrackerInstrumentation"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getProtocolVersion" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="protocol" type="java.lang.String"/>
|
|
<param name="clientVersion" type="long"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getInstrumentationClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="setInstrumentationClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="t" type="java.lang.Class"/>
|
|
</method>
|
|
<method name="cleanupStorage"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Removes all contents of temporary storage. Called upon
|
|
startup, to remove any leftovers from previous run.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="shutdown"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close down the TaskTracker and all its components. We must also shutdown
|
|
any running tasks or threads, and cleanup disk space. A new TaskTracker
|
|
within the same process space might be restarted, so everything must be
|
|
clean.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobClient" return="org.apache.hadoop.mapred.InterTrackerProtocol"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The connection to the JobTracker, used by the TaskRunner
|
|
for locating remote files.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskTrackerReportAddress" return="java.net.InetSocketAddress"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the port at which the tasktracker bound to]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJvmManagerInstance" return="org.apache.hadoop.mapred.JvmManager"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The server retry loop.
|
|
This while-loop attempts to connect to the JobTracker. It only
|
|
loops when the old TaskTracker has gone bad (its state is
|
|
stale somehow) and we need to reinitialize everything.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTask" return="org.apache.hadoop.mapred.JvmTask"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jvmId" type="org.apache.hadoop.mapred.JVMId"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Called upon startup by the child process, to fetch Task data.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="statusUpdate" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="taskStatus" type="org.apache.hadoop.mapred.TaskStatus"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Called periodically to report Task progress, from 0.0 to 1.0.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reportDiagnosticInfo"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="info" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Called when the task dies before completion, and we want to report back
|
|
diagnostic info]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reportNextRecordRange"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="range" type="org.apache.hadoop.mapred.SortedRanges.Range"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="ping" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Child checking to see if we're alive. Normally does nothing.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="commitPending"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="taskStatus" type="org.apache.hadoop.mapred.TaskStatus"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Task is reporting that it is in commit_pending
|
|
and it is waiting for the commit Response]]>
|
|
</doc>
|
|
</method>
|
|
<method name="canCommit" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<doc>
|
|
<![CDATA[Child checking whether it can commit]]>
|
|
</doc>
|
|
</method>
|
|
<method name="done"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[The task is done.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="shuffleError"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="message" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[A reduce-task failed to shuffle the map-outputs. Kill the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="fsError"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="message" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[A child task had a local filesystem error. Kill the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapCompletionEvents" return="org.apache.hadoop.mapred.MapTaskCompletionEventsUpdate"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
|
|
<param name="fromEventId" type="int"/>
|
|
<param name="maxLocs" type="int"/>
|
|
<param name="id" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="mapOutputLost"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
|
|
<param name="errorMsg" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[A completed map task's output has been lost.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isIdle" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Is this task tracker idle?
|
|
@return has this task tracker finished and cleaned up all of its tasks?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="argv" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
<doc>
|
|
<![CDATA[Start the TaskTracker, point toward the indicated JobTracker]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isTaskMemoryManagerEnabled" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Is the TaskMemoryManager Enabled on this system?
|
|
@return true if enabled, false otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskMemoryManager" return="org.apache.hadoop.mapred.TaskMemoryManagerThread"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="MR_CLIENTTRACE_FORMAT" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="ClientTraceLog" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[TaskTracker is a process that starts and tracks MR Tasks
|
|
in a networked environment. It contacts the JobTracker
|
|
for Task assignments and reporting results.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskTracker -->
|
|
<!-- start class org.apache.hadoop.mapred.TaskTracker.MapOutputServlet -->
|
|
<class name="TaskTracker.MapOutputServlet" extends="javax.servlet.http.HttpServlet"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskTracker.MapOutputServlet"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="doGet"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="request" type="javax.servlet.http.HttpServletRequest"/>
|
|
<param name="response" type="javax.servlet.http.HttpServletResponse"/>
|
|
<exception name="ServletException" type="javax.servlet.ServletException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class is used in TaskTracker's Jetty to serve the map outputs
|
|
to other nodes.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TaskTracker.MapOutputServlet -->
|
|
<!-- start class org.apache.hadoop.mapred.TextInputFormat -->
|
|
<class name="TextInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.TextInputFormat}
|
|
instead.">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<constructor name="TextInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="isSplitable" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="file" type="org.apache.hadoop.fs.Path"/>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines.
|
|
Either linefeed or carriage-return are used to signal end of line. Keys are
|
|
the position in the file, and values are the line of text..
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.TextInputFormat}
|
|
instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TextInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.TextOutputFormat -->
|
|
<class name="TextOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link org.apache.hadoop.mapreduce.lib.output.TextOutputFormat} instead.">
|
|
<constructor name="TextOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link OutputFormat} that writes plain text files.
|
|
@deprecated Use
|
|
{@link org.apache.hadoop.mapreduce.lib.output.TextOutputFormat} instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TextOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.TextOutputFormat.LineRecordWriter -->
|
|
<class name="TextOutputFormat.LineRecordWriter" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordWriter"/>
|
|
<constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="out" type="java.io.DataOutputStream"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TextOutputFormat.LineRecordWriter -->
|
|
<!-- start class org.apache.hadoop.mapred.TIPStatus -->
|
|
<class name="TIPStatus" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="false" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapred.TIPStatus[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapred.TIPStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<field name="PENDING" type="org.apache.hadoop.mapred.TIPStatus"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="RUNNING" type="org.apache.hadoop.mapred.TIPStatus"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="COMPLETE" type="org.apache.hadoop.mapred.TIPStatus"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="KILLED" type="org.apache.hadoop.mapred.TIPStatus"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="FAILED" type="org.apache.hadoop.mapred.TIPStatus"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[The states of a {@link TaskInProgress} as seen by the JobTracker.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.TIPStatus -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapred.jobcontrol">
|
|
<!-- start class org.apache.hadoop.mapred.jobcontrol.Job -->
|
|
<class name="Job" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Job" type="org.apache.hadoop.mapred.JobConf, java.util.ArrayList"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Construct a job.
|
|
@param jobConf a mapred job configuration representing a job to be executed.
|
|
@param dependingJobs an array of jobs the current job depends on]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="Job" type="org.apache.hadoop.mapred.JobConf"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Construct a job.
|
|
|
|
@param jobConf mapred job configuration representing a job to be executed.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getJobName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the job name of this job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the job name for this job.
|
|
@param jobName the job name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobID" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the job ID of this job assigned by JobControl]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="id" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the job ID for this job.
|
|
@param id the job ID]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapredJobID" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="use {@link #getAssignedJobID()} instead">
|
|
<doc>
|
|
<![CDATA[@return the mapred ID of this job
|
|
@deprecated use {@link #getAssignedJobID()} instead]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapredJobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="use {@link #setAssignedJobID(JobID)} instead">
|
|
<param name="mapredJobID" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the mapred ID for this job.
|
|
@param mapredJobID the mapred job ID for this job.
|
|
@deprecated use {@link #setAssignedJobID(JobID)} instead]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getAssignedJobID" return="org.apache.hadoop.mapred.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the mapred ID of this job as assigned by the
|
|
mapred framework.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setAssignedJobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="mapredJobID" type="org.apache.hadoop.mapred.JobID"/>
|
|
<doc>
|
|
<![CDATA[Set the mapred ID for this job as assigned by the
|
|
mapred framework.
|
|
@param mapredJobID the mapred job ID for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobConf" return="org.apache.hadoop.mapred.JobConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the mapred job conf of this job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Set the mapred job conf for this job.
|
|
@param jobConf the mapred job conf for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getState" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the state of this job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setState"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="state" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the state for this job.
|
|
@param state the new state for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMessage" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the message of this job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMessage"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="message" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the message for this job.
|
|
@param message the message for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobClient" return="org.apache.hadoop.mapred.JobClient"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the job client of this job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDependingJobs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the depending jobs of this job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addDependingJob" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="dependingJob" type="org.apache.hadoop.mapred.jobcontrol.Job"/>
|
|
<doc>
|
|
<![CDATA[Add a job to this jobs' dependency list. Dependent jobs can only be added while a Job
|
|
is waiting to run, not during or afterwards.
|
|
|
|
@param dependingJob Job that this Job depends on.
|
|
@return <tt>true</tt> if the Job was added.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isCompleted" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return true if this job is in a complete state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isReady" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return true if this job is in READY state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="submit"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Submit this job to mapred. The state becomes RUNNING if submission
|
|
is successful, FAILED otherwise.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="SUCCESS" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="WAITING" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="RUNNING" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="READY" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="FAILED" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="DEPENDENT_FAILED" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class encapsulates a MapReduce job and its dependency. It monitors
|
|
the states of the depending jobs and updates the state of this job.
|
|
A job starts in the WAITING state. If it does not have any depending jobs, or
|
|
all of the depending jobs are in SUCCESS state, then the job state will become
|
|
READY. If any depending jobs fail, the job will fail too.
|
|
When in READY state, the job can be submitted to Hadoop for execution, with
|
|
the state changing into RUNNING state. From RUNNING state, the job can get into
|
|
SUCCESS or FAILED state, depending the status of the job execution.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.jobcontrol.Job -->
|
|
<!-- start class org.apache.hadoop.mapred.jobcontrol.JobControl -->
|
|
<class name="JobControl" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="java.lang.Runnable"/>
|
|
<constructor name="JobControl" type="java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Construct a job control for a group of jobs.
|
|
@param groupName a name identifying this group]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getWaitingJobs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the jobs in the waiting state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRunningJobs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the jobs in the running state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReadyJobs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the jobs in the ready state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSuccessfulJobs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the jobs in the success state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getFailedJobs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="addJob" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="aJob" type="org.apache.hadoop.mapred.jobcontrol.Job"/>
|
|
<doc>
|
|
<![CDATA[Add a new job.
|
|
@param aJob the new job]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addJobs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobs" type="java.util.Collection"/>
|
|
<doc>
|
|
<![CDATA[Add a collection of jobs
|
|
|
|
@param jobs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getState" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the thread state]]>
|
|
</doc>
|
|
</method>
|
|
<method name="stop"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[set the thread state to STOPPING so that the
|
|
thread will stop when it wakes up.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="suspend"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[suspend the running thread]]>
|
|
</doc>
|
|
</method>
|
|
<method name="resume"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[resume the suspended thread]]>
|
|
</doc>
|
|
</method>
|
|
<method name="allFinished" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The main loop for the thread.
|
|
The loop does the following:
|
|
Check the states of the running jobs
|
|
Update the states of waiting jobs
|
|
Submit the jobs in ready state]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class encapsulates a set of MapReduce jobs and its dependency. It tracks
|
|
the states of the jobs by placing them into different tables according to their
|
|
states.
|
|
|
|
This class provides APIs for the client app to add a job to the group and to get
|
|
the jobs in the group in different states. When a
|
|
job is added, an ID unique to the group is assigned to the job.
|
|
|
|
This class has a thread that submits jobs when they become ready, monitors the
|
|
states of the running jobs, and updates the states of jobs based on the state changes
|
|
of their depending jobs states. The class provides APIs for suspending/resuming
|
|
the thread,and for stopping the thread.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.jobcontrol.JobControl -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapred.join">
|
|
<!-- start class org.apache.hadoop.mapred.join.ArrayListBackedIterator -->
|
|
<class name="ArrayListBackedIterator" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
|
|
<constructor name="ArrayListBackedIterator"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="ArrayListBackedIterator" type="java.util.ArrayList"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="replay" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="item" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="clear"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class provides an implementation of ResetableIterator. The
|
|
implementation uses an {@link java.util.ArrayList} to store elements
|
|
added to it, replaying them as requested.
|
|
Prefer {@link StreamBackedIterator}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.ArrayListBackedIterator -->
|
|
<!-- start interface org.apache.hadoop.mapred.join.ComposableInputFormat -->
|
|
<interface name="ComposableInputFormat" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.InputFormat"/>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.join.ComposableRecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Refinement of InputFormat requiring implementors to provide
|
|
ComposableRecordReader instead of RecordReader.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.join.ComposableInputFormat -->
|
|
<!-- start interface org.apache.hadoop.mapred.join.ComposableRecordReader -->
|
|
<interface name="ComposableRecordReader" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordReader"/>
|
|
<implements name="java.lang.Comparable"/>
|
|
<method name="id" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the position in the collector this class occupies.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key" return="org.apache.hadoop.io.WritableComparable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the key this RecordReader would supply on a call to next(K,V)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Clone the key at the head of this RecordReader into the object provided.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns true if the stream is not empty, but provides no guarantee that
|
|
a call to next(K,V) will succeed.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="skip"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/>
|
|
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[While key-value pairs from this RecordReader match the given key, register
|
|
them with the JoinCollector provided.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Additional operations required of a RecordReader to participate in a join.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.join.ComposableRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.join.CompositeInputFormat -->
|
|
<class name="CompositeInputFormat" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ComposableInputFormat"/>
|
|
<constructor name="CompositeInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setFormat"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Interpret a given string as a composite expression.
|
|
{@code
|
|
func ::= <ident>([<func>,]*<func>)
|
|
func ::= tbl(<class>,"<path>")
|
|
class ::= @see java.lang.Class#forName(java.lang.String)
|
|
path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String)
|
|
}
|
|
Reads expression from the <tt>mapred.join.expr</tt> property and
|
|
user-supplied join types from <tt>mapred.join.define.<ident></tt>
|
|
types. Paths supplied to <tt>tbl</tt> are given as input paths to the
|
|
InputFormat class listed.
|
|
@see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addDefaults"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Adds the default set of identifiers to the parser.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="numSplits" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Build a CompositeInputSplit from the child InputFormats by assigning the
|
|
ith split from each child to the ith composite split.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.join.ComposableRecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Construct a CompositeRecordReader for the children of this InputFormat
|
|
as defined in the init expression.
|
|
The outermost join need only be composable, not necessarily a composite.
|
|
Mandating TupleWritable isn't strictly correct.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="compose" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="inf" type="java.lang.Class"/>
|
|
<param name="path" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Convenience method for constructing composite formats.
|
|
Given InputFormat class (inf), path (p) return:
|
|
{@code tbl(<inf>, <p>) }]]>
|
|
</doc>
|
|
</method>
|
|
<method name="compose" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="op" type="java.lang.String"/>
|
|
<param name="inf" type="java.lang.Class"/>
|
|
<param name="path" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[Convenience method for constructing composite formats.
|
|
Given operation (op), Object class (inf), set of paths (p) return:
|
|
{@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]>
|
|
</doc>
|
|
</method>
|
|
<method name="compose" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="op" type="java.lang.String"/>
|
|
<param name="inf" type="java.lang.Class"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path[]"/>
|
|
<doc>
|
|
<![CDATA[Convenience method for constructing composite formats.
|
|
Given operation (op), Object class (inf), set of paths (p) return:
|
|
{@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An InputFormat capable of performing joins over a set of data sources sorted
|
|
and partitioned the same way.
|
|
@see #setFormat
|
|
|
|
A user may define new join types by setting the property
|
|
<tt>mapred.join.define.<ident></tt> to a classname. In the expression
|
|
<tt>mapred.join.expr</tt>, the identifier will be assumed to be a
|
|
ComposableRecordReader.
|
|
<tt>mapred.join.keycomparator</tt> can be a classname used to compare keys
|
|
in the join.
|
|
@see JoinRecordReader
|
|
@see MultiFilterRecordReader]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.CompositeInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.join.CompositeInputSplit -->
|
|
<class name="CompositeInputSplit" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.InputSplit"/>
|
|
<constructor name="CompositeInputSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="CompositeInputSplit" type="int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="s" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add an InputSplit to this collection.
|
|
@throws IOException If capacity was not specified during construction
|
|
or if capacity has been reached.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="get" return="org.apache.hadoop.mapred.InputSplit"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<doc>
|
|
<![CDATA[Get ith child InputSplit.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Return the aggregate length of all child InputSplits currently added.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the length of ith child InputSplit.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Collect a set of hosts from all child InputSplits.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocation" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[getLocations from ith InputSplit.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Write splits in the following format.
|
|
{@code
|
|
<count><class1><class2>...<classn><split1><split2>...<splitn>
|
|
}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}
|
|
@throws IOException If the child InputSplit cannot be read, typically
|
|
for faliing access checks.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This InputSplit contains a set of child InputSplits. Any InputSplit inserted
|
|
into this collection must have a public default constructor.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.CompositeInputSplit -->
|
|
<!-- start class org.apache.hadoop.mapred.join.CompositeRecordReader -->
|
|
<class name="CompositeRecordReader" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.conf.Configurable"/>
|
|
<constructor name="CompositeRecordReader" type="int, int, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Create a RecordReader with <tt>capacity</tt> children to position
|
|
<tt>id</tt> in the parent reader.
|
|
The id of a root CompositeRecordReader is -1 by convention, but relying
|
|
on this is not recommended.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="combine" return="boolean"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="srcs" type="java.lang.Object[]"/>
|
|
<param name="value" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
</method>
|
|
<method name="id" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the position in the collector this class occupies.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordReaderQueue" return="java.util.PriorityQueue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return sorted list of RecordReaders for this composite.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getComparator" return="org.apache.hadoop.io.WritableComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return comparator defining the ordering for RecordReaders in this
|
|
composite.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="rr" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add a RecordReader to this collection.
|
|
The id() of a RecordReader determines where in the Tuple its
|
|
entry will appear. Adding RecordReaders with the same id has
|
|
undefined behavior.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key" return="org.apache.hadoop.io.WritableComparable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the key for the current join or the value at the top of the
|
|
RecordReader heap.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Clone the key at the top of this RR into the given object.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return true if it is possible that this could emit more values.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="skip"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Pass skip key to child RRs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Obtain an iterator over the child RRs apropos of the value type
|
|
ultimately emitted from this join.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/>
|
|
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[If key provided matches that of this Composite, give JoinCollector
|
|
iterator over values it may emit.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="fillJoinCollector"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="iterkey" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For all child RRs offering the key provided, obtain an iterator
|
|
at that position in the JoinCollector.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="compareTo" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
|
|
<doc>
|
|
<![CDATA[Implement Comparable contract (compare key of join or head of heap
|
|
with that of another).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createKey" return="org.apache.hadoop.io.WritableComparable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a new key value common to all child RRs.
|
|
@throws ClassCastException if key classes differ.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createInternalValue" return="org.apache.hadoop.mapred.join.TupleWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a value to be used internally for joins.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Unsupported (returns zero in all cases).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close all child RRs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Report progress as the minimum of all child RR progress.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"
|
|
transient="false" volatile="false"
|
|
static="false" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="kids" type="org.apache.hadoop.mapred.join.ComposableRecordReader[]"
|
|
transient="false" volatile="false"
|
|
static="false" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A RecordReader that can effect joins of RecordReaders sharing a common key
|
|
type and partitioning.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.CompositeRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.join.InnerJoinRecordReader -->
|
|
<class name="InnerJoinRecordReader" extends="org.apache.hadoop.mapred.join.JoinRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="combine" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="srcs" type="java.lang.Object[]"/>
|
|
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<doc>
|
|
<![CDATA[Return true iff the tuple is full (all data sources contain this key).]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Full inner join.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.InnerJoinRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.join.JoinRecordReader -->
|
|
<class name="JoinRecordReader" extends="org.apache.hadoop.mapred.join.CompositeRecordReader"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
|
|
<constructor name="JoinRecordReader" type="int, org.apache.hadoop.mapred.JobConf, int, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<param name="value" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Emit the next set of key, value pairs as defined by the child
|
|
RecordReaders and operation associated with this composite RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValue" return="org.apache.hadoop.mapred.join.TupleWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return an iterator wrapping the JoinCollector.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Base class for Composite joins returning Tuples of arbitrary Writables.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.JoinRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.join.JoinRecordReader.JoinDelegationIterator -->
|
|
<class name="JoinRecordReader.JoinDelegationIterator" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
|
|
<constructor name="JoinRecordReader.JoinDelegationIterator"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="replay" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="item" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="clear"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Since the JoinCollector is effecting our operation, we need only
|
|
provide an iterator proxy wrapping its operation.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.JoinRecordReader.JoinDelegationIterator -->
|
|
<!-- start class org.apache.hadoop.mapred.join.MultiFilterRecordReader -->
|
|
<class name="MultiFilterRecordReader" extends="org.apache.hadoop.mapred.join.CompositeRecordReader"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
|
|
<constructor name="MultiFilterRecordReader" type="int, org.apache.hadoop.mapred.JobConf, int, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="emit" return="org.apache.hadoop.io.Writable"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For each tuple emitted, return a value (typically one of the values
|
|
in the tuple).
|
|
Modifying the Writables in the tuple is permitted and unlikely to affect
|
|
join behavior in most cases, but it is not recommended. It's safer to
|
|
clone first.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="combine" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="srcs" type="java.lang.Object[]"/>
|
|
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<doc>
|
|
<![CDATA[Default implementation offers {@link #emit} every Tuple from the
|
|
collector (the outer join of child RRs).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<param name="value" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValue" return="org.apache.hadoop.io.Writable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return an iterator returning a single value from the tuple.
|
|
@see MultiFilterDelegationIterator]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Base class for Composite join returning values derived from multiple
|
|
sources, but generally not tuples.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.MultiFilterRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.join.MultiFilterRecordReader.MultiFilterDelegationIterator -->
|
|
<class name="MultiFilterRecordReader.MultiFilterDelegationIterator" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
|
|
<constructor name="MultiFilterRecordReader.MultiFilterDelegationIterator"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="replay" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="item" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="clear"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Proxy the JoinCollector, but include callback to emit.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.MultiFilterRecordReader.MultiFilterDelegationIterator -->
|
|
<!-- start class org.apache.hadoop.mapred.join.OuterJoinRecordReader -->
|
|
<class name="OuterJoinRecordReader" extends="org.apache.hadoop.mapred.join.JoinRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="combine" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="srcs" type="java.lang.Object[]"/>
|
|
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<doc>
|
|
<![CDATA[Emit everything from the collector.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Full outer join.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.OuterJoinRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.join.OverrideRecordReader -->
|
|
<class name="OverrideRecordReader" extends="org.apache.hadoop.mapred.join.MultiFilterRecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="emit" return="org.apache.hadoop.io.Writable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
|
|
<doc>
|
|
<![CDATA[Emit the value with the highest position in the tuple.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="fillJoinCollector"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="iterkey" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Instead of filling the JoinCollector with iterators from all
|
|
data sources, fill only the rightmost for this key.
|
|
This not only saves space by discarding the other sources, but
|
|
it also emits the number of key-value pairs in the preferred
|
|
RecordReader instead of repeating that stream n times, where
|
|
n is the cardinality of the cross product of the discarded
|
|
streams for the given key.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Prefer the "rightmost" data source for this key.
|
|
For example, <tt>override(S1,S2,S3)</tt> will prefer values
|
|
from S3 over S2, and values from S2 over S1 for all keys
|
|
emitted from all sources.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.OverrideRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.join.Parser -->
|
|
<class name="Parser" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Parser"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<doc>
|
|
<![CDATA[Very simple shift-reduce parser for join expressions.
|
|
|
|
This should be sufficient for the user extension permitted now, but ought to
|
|
be replaced with a parser generator if more complex grammars are supported.
|
|
In particular, this "shift-reduce" parser has no states. Each set
|
|
of formals requires a different internal node type, which is responsible for
|
|
interpreting the list of tokens it receives. This is sufficient for the
|
|
current grammar, but it has several annoying properties that might inhibit
|
|
extension. In particular, parenthesis are always function calls; an
|
|
algebraic or filter grammar would not only require a node type, but must
|
|
also work around the internals of this parser.
|
|
|
|
For most other cases, adding classes to the hierarchy- particularly by
|
|
extending JoinRecordReader and MultiFilterRecordReader- is fairly
|
|
straightforward. One need only override the relevant method(s) (usually only
|
|
{@link CompositeRecordReader#combine}) and include a property to map its
|
|
value to an identifier in the parser.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.Parser -->
|
|
<!-- start class org.apache.hadoop.mapred.join.Parser.Node -->
|
|
<class name="Parser.Node" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ComposableInputFormat"/>
|
|
<constructor name="Parser.Node" type="java.lang.String"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="addIdentifier"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="ident" type="java.lang.String"/>
|
|
<param name="mcstrSig" type="java.lang.Class[]"/>
|
|
<param name="nodetype" type="java.lang.Class"/>
|
|
<param name="cl" type="java.lang.Class"/>
|
|
<exception name="NoSuchMethodException" type="java.lang.NoSuchMethodException"/>
|
|
<doc>
|
|
<![CDATA[For a given identifier, add a mapping to the nodetype for the parse
|
|
tree and to the ComposableRecordReader to be created, including the
|
|
formals required to invoke the constructor.
|
|
The nodetype and constructor signature should be filled in from the
|
|
child node.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="id" type="int"/>
|
|
</method>
|
|
<method name="setKeyComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="cmpcl" type="java.lang.Class"/>
|
|
</method>
|
|
<field name="rrCstrMap" type="java.util.Map"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="id" type="int"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="ident" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="cmpcl" type="java.lang.Class"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.Parser.Node -->
|
|
<!-- start class org.apache.hadoop.mapred.join.Parser.NodeToken -->
|
|
<class name="Parser.NodeToken" extends="org.apache.hadoop.mapred.join.Parser.Token"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="getNode" return="org.apache.hadoop.mapred.join.Parser.Node"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.Parser.NodeToken -->
|
|
<!-- start class org.apache.hadoop.mapred.join.Parser.NumToken -->
|
|
<class name="Parser.NumToken" extends="org.apache.hadoop.mapred.join.Parser.Token"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Parser.NumToken" type="double"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getNum" return="double"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.Parser.NumToken -->
|
|
<!-- start class org.apache.hadoop.mapred.join.Parser.StrToken -->
|
|
<class name="Parser.StrToken" extends="org.apache.hadoop.mapred.join.Parser.Token"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Parser.StrToken" type="org.apache.hadoop.mapred.join.Parser.TType, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getStr" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.Parser.StrToken -->
|
|
<!-- start class org.apache.hadoop.mapred.join.Parser.Token -->
|
|
<class name="Parser.Token" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="getType" return="org.apache.hadoop.mapred.join.Parser.TType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getNode" return="org.apache.hadoop.mapred.join.Parser.Node"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getNum" return="double"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getStr" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Tagged-union type for tokens from the join expression.
|
|
@see Parser.TType]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.Parser.Token -->
|
|
<!-- start class org.apache.hadoop.mapred.join.Parser.TType -->
|
|
<class name="Parser.TType" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapred.join.Parser.TType[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapred.join.Parser.TType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<field name="CIF" type="org.apache.hadoop.mapred.join.Parser.TType"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="IDENT" type="org.apache.hadoop.mapred.join.Parser.TType"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="COMMA" type="org.apache.hadoop.mapred.join.Parser.TType"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="LPAREN" type="org.apache.hadoop.mapred.join.Parser.TType"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="RPAREN" type="org.apache.hadoop.mapred.join.Parser.TType"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="QUOT" type="org.apache.hadoop.mapred.join.Parser.TType"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="NUM" type="org.apache.hadoop.mapred.join.Parser.TType"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.Parser.TType -->
|
|
<!-- start interface org.apache.hadoop.mapred.join.ResetableIterator -->
|
|
<interface name="ResetableIterator" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[True if a call to next may return a value. This is permitted false
|
|
positives, but not false negatives.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Assign next value to actual.
|
|
It is required that elements added to a ResetableIterator be returned in
|
|
the same order after a call to {@link #reset} (FIFO).
|
|
|
|
Note that a call to this may fail for nested joins (i.e. more elements
|
|
available, but none satisfying the constraints of the join)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="replay" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Assign last value returned to actual.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Set iterator to return to the start of its range. Must be called after
|
|
calling {@link #add} to avoid a ConcurrentModificationException.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="item" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add an element to the collection of elements to iterate over.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close datasources and release resources. Calling methods on the iterator
|
|
after calling close has undefined behavior.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="clear"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Close datasources, but do not release internal resources. Calling this
|
|
method should permit the object to be reused with a different datasource.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This defines an interface to a stateful Iterator that can replay elements
|
|
added to it directly.
|
|
Note that this does not extend {@link java.util.Iterator}.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.join.ResetableIterator -->
|
|
<!-- start class org.apache.hadoop.mapred.join.ResetableIterator.EMPTY -->
|
|
<class name="ResetableIterator.EMPTY" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
|
|
<constructor name="ResetableIterator.EMPTY"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="clear"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="replay" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="item" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.ResetableIterator.EMPTY -->
|
|
<!-- start class org.apache.hadoop.mapred.join.StreamBackedIterator -->
|
|
<class name="StreamBackedIterator" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
|
|
<constructor name="StreamBackedIterator"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="replay" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="add"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="item" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="clear"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class provides an implementation of ResetableIterator. This
|
|
implementation uses a byte array to store elements added to it.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.StreamBackedIterator -->
|
|
<!-- start class org.apache.hadoop.mapred.join.TupleWritable -->
|
|
<class name="TupleWritable" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<implements name="java.lang.Iterable"/>
|
|
<constructor name="TupleWritable"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create an empty tuple with no allocated storage for writables.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TupleWritable" type="org.apache.hadoop.io.Writable[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Initialize tuple with storage; unknown whether any of them contain
|
|
"written" values.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="has" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<doc>
|
|
<![CDATA[Return true if tuple has an element at the position provided.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="get" return="org.apache.hadoop.io.Writable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<doc>
|
|
<![CDATA[Get ith Writable from Tuple.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="size" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The number of children in this Tuple.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="iterator" return="java.util.Iterator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return an iterator over the elements in this tuple.
|
|
Note that this doesn't flatten the tuple; one may receive tuples
|
|
from this iterator.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Convert Tuple to String as in the following.
|
|
<tt>[<child1>,<child2>,...,<childn>]</tt>]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Writes each Writable to <code>out</code>.
|
|
TupleWritable format:
|
|
{@code
|
|
<count><type1><type2>...<typen><obj1><obj2>...<objn>
|
|
}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Writable type storing multiple {@link org.apache.hadoop.io.Writable}s.
|
|
|
|
This is *not* a general-purpose tuple type. In almost all cases, users are
|
|
encouraged to implement their own serializable types, which can perform
|
|
better validation and provide more efficient encodings than this class is
|
|
capable. TupleWritable relies on the join framework for type safety and
|
|
assumes its instances will rarely be persisted, assumptions not only
|
|
incompatible with, but contrary to the general case.
|
|
|
|
@see org.apache.hadoop.io.Writable]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.TupleWritable -->
|
|
<!-- start class org.apache.hadoop.mapred.join.WrappedRecordReader -->
|
|
<class name="WrappedRecordReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
|
|
<method name="id" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key" return="org.apache.hadoop.io.WritableComparable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the key at the head of this RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="key"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="qkey" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Clone the key at the head of this RR into the object supplied.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return true if the RR- including the k,v pair stored in this object-
|
|
is exhausted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="skip"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read the next k,v pair into the head of this object; return true iff
|
|
the RR and this are exhausted.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="accept"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/>
|
|
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add an iterator to the collector at the position occupied by this
|
|
RecordReader over the values in this stream paired with the key
|
|
provided (ie register a stream of values from this source matching K
|
|
with a collector).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<param name="value" type="org.apache.hadoop.io.Writable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Write key-value pair at the head of this stream to the objects provided;
|
|
get next key-value pair from proxied RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createKey" return="org.apache.hadoop.io.WritableComparable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Request new key from proxied RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValue" return="org.apache.hadoop.io.Writable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Request new value from proxied RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Request progress from proxied RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Request position from proxied RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Forward close request to proxied RR.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="compareTo" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
|
|
<doc>
|
|
<![CDATA[Implement Comparable contract (compare key at head of proxied RR
|
|
with that of another).]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Return true iff compareTo(other) retn true.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Proxy class for a RecordReader participating in the join framework.
|
|
This class keeps track of the "head" key-value pair for the
|
|
provided RecordReader and keeps a store of values matching a key when
|
|
this source is participating in a join.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.join.WrappedRecordReader -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapred.lib">
|
|
<!-- start class org.apache.hadoop.mapred.lib.ChainMapper -->
|
|
<class name="ChainMapper" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<constructor name="ChainMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructor.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addMapper"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="klass" type="java.lang.Class"/>
|
|
<param name="inputKeyClass" type="java.lang.Class"/>
|
|
<param name="inputValueClass" type="java.lang.Class"/>
|
|
<param name="outputKeyClass" type="java.lang.Class"/>
|
|
<param name="outputValueClass" type="java.lang.Class"/>
|
|
<param name="byValue" type="boolean"/>
|
|
<param name="mapperConf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Adds a Mapper class to the chain job's JobConf.
|
|
<p/>
|
|
It has to be specified how key and values are passed from one element of
|
|
the chain to the next, by value or by reference. If a Mapper leverages the
|
|
assumed semantics that the key and values are not modified by the collector
|
|
'by value' must be used. If the Mapper does not expect this semantics, as
|
|
an optimization to avoid serialization and deserialization 'by reference'
|
|
can be used.
|
|
<p/>
|
|
For the added Mapper the configuration given for it,
|
|
<code>mapperConf</code>, have precedence over the job's JobConf. This
|
|
precedence is in effect when the task is running.
|
|
<p/>
|
|
IMPORTANT: There is no need to specify the output key/value classes for the
|
|
ChainMapper, this is done by the addMapper for the last mapper in the chain
|
|
<p/>
|
|
|
|
@param job job's JobConf to add the Mapper class.
|
|
@param klass the Mapper class to add.
|
|
@param inputKeyClass mapper input key class.
|
|
@param inputValueClass mapper input value class.
|
|
@param outputKeyClass mapper output key class.
|
|
@param outputValueClass mapper output value class.
|
|
@param byValue indicates if key/values should be passed by value
|
|
to the next Mapper in the chain, if any.
|
|
@param mapperConf a JobConf with the configuration for the Mapper
|
|
class. It is recommended to use a JobConf without default values using the
|
|
<code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Configures the ChainMapper and all the Mappers in the chain.
|
|
<p/>
|
|
If this method is overriden <code>super.configure(...)</code> should be
|
|
invoked at the beginning of the overwriter method.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Chains the <code>map(...)</code> methods of the Mappers in the chain.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Closes the ChainMapper and all the Mappers in the chain.
|
|
<p/>
|
|
If this method is overriden <code>super.close()</code> should be
|
|
invoked at the end of the overwriter method.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The ChainMapper class allows to use multiple Mapper classes within a single
|
|
Map task.
|
|
<p/>
|
|
The Mapper classes are invoked in a chained (or piped) fashion, the output of
|
|
the first becomes the input of the second, and so on until the last Mapper,
|
|
the output of the last Mapper will be written to the task's output.
|
|
<p/>
|
|
The key functionality of this feature is that the Mappers in the chain do not
|
|
need to be aware that they are executed in a chain. This enables having
|
|
reusable specialized Mappers that can be combined to perform composite
|
|
operations within a single task.
|
|
<p/>
|
|
Special care has to be taken when creating chains that the key/values output
|
|
by a Mapper are valid for the following Mapper in the chain. It is assumed
|
|
all Mappers and the Reduce in the chain use maching output and input key and
|
|
value classes as no conversion is done by the chaining code.
|
|
<p/>
|
|
Using the ChainMapper and the ChainReducer classes is possible to compose
|
|
Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And
|
|
immediate benefit of this pattern is a dramatic reduction in disk IO.
|
|
<p/>
|
|
IMPORTANT: There is no need to specify the output key/value classes for the
|
|
ChainMapper, this is done by the addMapper for the last mapper in the chain.
|
|
<p/>
|
|
ChainMapper usage pattern:
|
|
<p/>
|
|
<pre>
|
|
...
|
|
conf.setJobName("chain");
|
|
conf.setInputFormat(TextInputFormat.class);
|
|
conf.setOutputFormat(TextOutputFormat.class);
|
|
<p/>
|
|
JobConf mapAConf = new JobConf(false);
|
|
...
|
|
ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class,
|
|
Text.class, Text.class, true, mapAConf);
|
|
<p/>
|
|
JobConf mapBConf = new JobConf(false);
|
|
...
|
|
ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class,
|
|
LongWritable.class, Text.class, false, mapBConf);
|
|
<p/>
|
|
JobConf reduceConf = new JobConf(false);
|
|
...
|
|
ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class,
|
|
Text.class, Text.class, true, reduceConf);
|
|
<p/>
|
|
ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class,
|
|
LongWritable.class, Text.class, false, null);
|
|
<p/>
|
|
ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class,
|
|
LongWritable.class, LongWritable.class, true, null);
|
|
<p/>
|
|
FileInputFormat.setInputPaths(conf, inDir);
|
|
FileOutputFormat.setOutputPath(conf, outDir);
|
|
...
|
|
<p/>
|
|
JobClient jc = new JobClient(conf);
|
|
RunningJob job = jc.submitJob(conf);
|
|
...
|
|
</pre>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.ChainMapper -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.ChainReducer -->
|
|
<class name="ChainReducer" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Reducer"/>
|
|
<constructor name="ChainReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructor.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="setReducer"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="klass" type="java.lang.Class"/>
|
|
<param name="inputKeyClass" type="java.lang.Class"/>
|
|
<param name="inputValueClass" type="java.lang.Class"/>
|
|
<param name="outputKeyClass" type="java.lang.Class"/>
|
|
<param name="outputValueClass" type="java.lang.Class"/>
|
|
<param name="byValue" type="boolean"/>
|
|
<param name="reducerConf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Sets the Reducer class to the chain job's JobConf.
|
|
<p/>
|
|
It has to be specified how key and values are passed from one element of
|
|
the chain to the next, by value or by reference. If a Reducer leverages the
|
|
assumed semantics that the key and values are not modified by the collector
|
|
'by value' must be used. If the Reducer does not expect this semantics, as
|
|
an optimization to avoid serialization and deserialization 'by reference'
|
|
can be used.
|
|
<p/>
|
|
For the added Reducer the configuration given for it,
|
|
<code>reducerConf</code>, have precedence over the job's JobConf. This
|
|
precedence is in effect when the task is running.
|
|
<p/>
|
|
IMPORTANT: There is no need to specify the output key/value classes for the
|
|
ChainReducer, this is done by the setReducer or the addMapper for the last
|
|
element in the chain.
|
|
|
|
@param job job's JobConf to add the Reducer class.
|
|
@param klass the Reducer class to add.
|
|
@param inputKeyClass reducer input key class.
|
|
@param inputValueClass reducer input value class.
|
|
@param outputKeyClass reducer output key class.
|
|
@param outputValueClass reducer output value class.
|
|
@param byValue indicates if key/values should be passed by value
|
|
to the next Mapper in the chain, if any.
|
|
@param reducerConf a JobConf with the configuration for the Reducer
|
|
class. It is recommended to use a JobConf without default values using the
|
|
<code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addMapper"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="klass" type="java.lang.Class"/>
|
|
<param name="inputKeyClass" type="java.lang.Class"/>
|
|
<param name="inputValueClass" type="java.lang.Class"/>
|
|
<param name="outputKeyClass" type="java.lang.Class"/>
|
|
<param name="outputValueClass" type="java.lang.Class"/>
|
|
<param name="byValue" type="boolean"/>
|
|
<param name="mapperConf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Adds a Mapper class to the chain job's JobConf.
|
|
<p/>
|
|
It has to be specified how key and values are passed from one element of
|
|
the chain to the next, by value or by reference. If a Mapper leverages the
|
|
assumed semantics that the key and values are not modified by the collector
|
|
'by value' must be used. If the Mapper does not expect this semantics, as
|
|
an optimization to avoid serialization and deserialization 'by reference'
|
|
can be used.
|
|
<p/>
|
|
For the added Mapper the configuration given for it,
|
|
<code>mapperConf</code>, have precedence over the job's JobConf. This
|
|
precedence is in effect when the task is running.
|
|
<p/>
|
|
IMPORTANT: There is no need to specify the output key/value classes for the
|
|
ChainMapper, this is done by the addMapper for the last mapper in the chain
|
|
.
|
|
|
|
@param job chain job's JobConf to add the Mapper class.
|
|
@param klass the Mapper class to add.
|
|
@param inputKeyClass mapper input key class.
|
|
@param inputValueClass mapper input value class.
|
|
@param outputKeyClass mapper output key class.
|
|
@param outputValueClass mapper output value class.
|
|
@param byValue indicates if key/values should be passed by value
|
|
to the next Mapper in the chain, if any.
|
|
@param mapperConf a JobConf with the configuration for the Mapper
|
|
class. It is recommended to use a JobConf without default values using the
|
|
<code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Configures the ChainReducer, the Reducer and all the Mappers in the chain.
|
|
<p/>
|
|
If this method is overriden <code>super.configure(...)</code> should be
|
|
invoked at the beginning of the overwriter method.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="values" type="java.util.Iterator"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Chains the <code>reduce(...)</code> method of the Reducer with the
|
|
<code>map(...) </code> methods of the Mappers in the chain.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Closes the ChainReducer, the Reducer and all the Mappers in the chain.
|
|
<p/>
|
|
If this method is overriden <code>super.close()</code> should be
|
|
invoked at the end of the overwriter method.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The ChainReducer class allows to chain multiple Mapper classes after a
|
|
Reducer within the Reducer task.
|
|
<p/>
|
|
For each record output by the Reducer, the Mapper classes are invoked in a
|
|
chained (or piped) fashion, the output of the first becomes the input of the
|
|
second, and so on until the last Mapper, the output of the last Mapper will
|
|
be written to the task's output.
|
|
<p/>
|
|
The key functionality of this feature is that the Mappers in the chain do not
|
|
need to be aware that they are executed after the Reducer or in a chain.
|
|
This enables having reusable specialized Mappers that can be combined to
|
|
perform composite operations within a single task.
|
|
<p/>
|
|
Special care has to be taken when creating chains that the key/values output
|
|
by a Mapper are valid for the following Mapper in the chain. It is assumed
|
|
all Mappers and the Reduce in the chain use maching output and input key and
|
|
value classes as no conversion is done by the chaining code.
|
|
<p/>
|
|
Using the ChainMapper and the ChainReducer classes is possible to compose
|
|
Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And
|
|
immediate benefit of this pattern is a dramatic reduction in disk IO.
|
|
<p/>
|
|
IMPORTANT: There is no need to specify the output key/value classes for the
|
|
ChainReducer, this is done by the setReducer or the addMapper for the last
|
|
element in the chain.
|
|
<p/>
|
|
ChainReducer usage pattern:
|
|
<p/>
|
|
<pre>
|
|
...
|
|
conf.setJobName("chain");
|
|
conf.setInputFormat(TextInputFormat.class);
|
|
conf.setOutputFormat(TextOutputFormat.class);
|
|
<p/>
|
|
JobConf mapAConf = new JobConf(false);
|
|
...
|
|
ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class,
|
|
Text.class, Text.class, true, mapAConf);
|
|
<p/>
|
|
JobConf mapBConf = new JobConf(false);
|
|
...
|
|
ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class,
|
|
LongWritable.class, Text.class, false, mapBConf);
|
|
<p/>
|
|
JobConf reduceConf = new JobConf(false);
|
|
...
|
|
ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class,
|
|
Text.class, Text.class, true, reduceConf);
|
|
<p/>
|
|
ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class,
|
|
LongWritable.class, Text.class, false, null);
|
|
<p/>
|
|
ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class,
|
|
LongWritable.class, LongWritable.class, true, null);
|
|
<p/>
|
|
FileInputFormat.setInputPaths(conf, inDir);
|
|
FileOutputFormat.setOutputPath(conf, outDir);
|
|
...
|
|
<p/>
|
|
JobClient jc = new JobClient(conf);
|
|
RunningJob job = jc.submitJob(conf);
|
|
...
|
|
</pre>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.ChainReducer -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.CombineFileInputFormat -->
|
|
<class name="CombineFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="CombineFileInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="setMaxSplitSize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="maxSplitSize" type="long"/>
|
|
<doc>
|
|
<![CDATA[Specify the maximum size (in bytes) of each split. Each split is
|
|
approximately equal to the specified size.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMinSplitSizeNode"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="minSplitSizeNode" type="long"/>
|
|
<doc>
|
|
<![CDATA[Specify the minimum size (in bytes) of each split per node.
|
|
This applies to data that is left over after combining data on a single
|
|
node into splits that are of maximum size specified by maxSplitSize.
|
|
This leftover data will be combined into its own split if its size
|
|
exceeds minSplitSizeNode.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMinSplitSizeRack"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="minSplitSizeRack" type="long"/>
|
|
<doc>
|
|
<![CDATA[Specify the minimum size (in bytes) of each split per rack.
|
|
This applies to data that is left over after combining data on a single
|
|
rack into splits that are of maximum size specified by maxSplitSize.
|
|
This leftover data will be combined into its own split if its size
|
|
exceeds minSplitSizeRack.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createPool"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="filters" type="java.util.List"/>
|
|
<doc>
|
|
<![CDATA[Create a new pool and add the filters to it.
|
|
A split cannot have files from different pools.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createPool"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="filters" type="org.apache.hadoop.fs.PathFilter[]"/>
|
|
<doc>
|
|
<![CDATA[Create a new pool and add the filters to it.
|
|
A pathname can satisfy any one of the specified filters.
|
|
A split cannot have files from different pools.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="numSplits" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[This is not implemented yet.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An abstract {@link org.apache.hadoop.mapred.InputFormat} that returns {@link CombineFileSplit}'s
|
|
in {@link org.apache.hadoop.mapred.InputFormat#getSplits(JobConf, int)} method.
|
|
Splits are constructed from the files under the input paths.
|
|
A split cannot have files from different pools.
|
|
Each split returned may contain blocks from different files.
|
|
If a maxSplitSize is specified, then blocks on the same node are
|
|
combined to form a single split. Blocks that are left over are
|
|
then combined with other blocks in the same rack.
|
|
If maxSplitSize is not specified, then blocks from the same rack
|
|
are combined in a single split; no attempt is made to create
|
|
node-local splits.
|
|
If the maxSplitSize is equal to the block size, then this class
|
|
is similar to the default spliting behaviour in Hadoop: each
|
|
block is a locally processed split.
|
|
Subclasses implement {@link org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit, JobConf, Reporter)}
|
|
to construct <code>RecordReader</code>'s for <code>CombineFileSplit</code>'s.
|
|
@see CombineFileSplit]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.CombineFileInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.CombineFileRecordReader -->
|
|
<class name="CombineFileRecordReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordReader"/>
|
|
<constructor name="CombineFileRecordReader" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.lib.CombineFileSplit, org.apache.hadoop.mapred.Reporter, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[A generic RecordReader that can hand out different recordReaders
|
|
for each chunk in the CombineFileSplit.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="createKey" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="createValue" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[return the amount of data processed]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[return progress based on the amount of data processed so far.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="initNextRecordReader" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the record reader for the next chunk in this CombineFileSplit.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="split" type="org.apache.hadoop.mapred.lib.CombineFileSplit"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="jc" type="org.apache.hadoop.mapred.JobConf"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="reporter" type="org.apache.hadoop.mapred.Reporter"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="rrClass" type="java.lang.Class"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="rrConstructor" type="java.lang.reflect.Constructor"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="fs" type="org.apache.hadoop.fs.FileSystem"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="idx" type="int"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="progress" type="long"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="curReader" type="org.apache.hadoop.mapred.RecordReader"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A generic RecordReader that can hand out different recordReaders
|
|
for each chunk in a {@link CombineFileSplit}.
|
|
A CombineFileSplit can combine data chunks from multiple files.
|
|
This class allows using different RecordReaders for processing
|
|
these data chunks from different files.
|
|
@see CombineFileSplit]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.CombineFileRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.CombineFileSplit -->
|
|
<class name="CombineFileSplit" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.InputSplit"/>
|
|
<constructor name="CombineFileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[], long[], java.lang.String[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.lib.CombineFileSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Copy constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getJob" return="org.apache.hadoop.mapred.JobConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getStartOffsets" return="long[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns an array containing the startoffsets of the files in the split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLengths" return="long[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns an array containing the lengths of the files in the split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOffset" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<doc>
|
|
<![CDATA[Returns the start offset of the i<sup>th</sup> Path]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<doc>
|
|
<![CDATA[Returns the length of the i<sup>th</sup> Path]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumPaths" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the number of Paths in the split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="i" type="int"/>
|
|
<doc>
|
|
<![CDATA[Returns the i<sup>th</sup> Path]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPaths" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns all the Paths in the split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Returns all the Paths where this input-split resides]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A sub-collection of input files. Unlike {@link org.apache.hadoop.mapred.FileSplit},
|
|
CombineFileSplit * class does not represent a split of a file, but a split of input files
|
|
into smaller sets. A split may contain blocks from different file but all
|
|
the blocks in the same split are probably local to some rack <br>
|
|
CombineFileSplit can be used to implement {@link org.apache.hadoop.mapred.RecordReader}'s,
|
|
with reading one record per file.
|
|
@see org.apache.hadoop.mapred.FileSplit
|
|
@see CombineFileInputFormat]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.CombineFileSplit -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.DelegatingInputFormat -->
|
|
<class name="DelegatingInputFormat" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.InputFormat"/>
|
|
<constructor name="DelegatingInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="numSplits" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link InputFormat} that delegates behaviour of paths to multiple other
|
|
InputFormats.
|
|
|
|
@see MultipleInputs#addInputPath(JobConf, Path, Class, Class)]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.DelegatingInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.DelegatingMapper -->
|
|
<class name="DelegatingMapper" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<constructor name="DelegatingMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<param name="outputCollector" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link Mapper} that delegates behaviour of paths to multiple other
|
|
mappers.
|
|
|
|
@see MultipleInputs#addInputPath(JobConf, Path, Class, Class)]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.DelegatingMapper -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.FieldSelectionMapReduce -->
|
|
<class name="FieldSelectionMapReduce" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<implements name="org.apache.hadoop.mapred.Reducer"/>
|
|
<constructor name="FieldSelectionMapReduce"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="val" type="java.lang.Object"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[The identify function. Input key/value pair is written directly to output.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.Text"/>
|
|
<param name="values" type="java.util.Iterator"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class implements a mapper/reducer class that can be used to perform
|
|
field selections in a manner similar to unix cut. The input data is treated
|
|
as fields separated by a user specified separator (the default value is
|
|
"\t"). The user can specify a list of fields that form the map output keys,
|
|
and a list of fields that form the map output values. If the inputformat is
|
|
TextInputFormat, the mapper will ignore the key to the map function. and the
|
|
fields are from the value only. Otherwise, the fields are the union of those
|
|
from the key and those from the value.
|
|
|
|
The field separator is under attribute "mapred.data.field.separator"
|
|
|
|
The map output field list spec is under attribute "map.output.key.value.fields.spec".
|
|
The value is expected to be like "keyFieldsSpec:valueFieldsSpec"
|
|
key/valueFieldsSpec are comma (,) separated field spec: fieldSpec,fieldSpec,fieldSpec ...
|
|
Each field spec can be a simple number (e.g. 5) specifying a specific field, or a range
|
|
(like 2-5) to specify a range of fields, or an open range (like 3-) specifying all
|
|
the fields starting from field 3. The open range field spec applies value fields only.
|
|
They have no effect on the key fields.
|
|
|
|
Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields 4,3,0 and 1 for keys,
|
|
and use fields 6,5,1,2,3,7 and above for values.
|
|
|
|
The reduce output field list spec is under attribute "reduce.output.key.value.fields.spec".
|
|
|
|
The reducer extracts output key/value pairs in a similar manner, except that
|
|
the key is never ignored.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.FieldSelectionMapReduce -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.HashPartitioner -->
|
|
<class name="HashPartitioner" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link org.apache.hadoop.mapreduce.lib.partition.HashPartitioner} instead.">
|
|
<implements name="org.apache.hadoop.mapred.Partitioner"/>
|
|
<constructor name="HashPartitioner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="getPartition" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<param name="numReduceTasks" type="int"/>
|
|
<doc>
|
|
<![CDATA[Use {@link Object#hashCode()} to partition.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Partition keys by their {@link Object#hashCode()}.
|
|
@deprecated Use
|
|
{@link org.apache.hadoop.mapreduce.lib.partition.HashPartitioner} instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.HashPartitioner -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.IdentityMapper -->
|
|
<class name="IdentityMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.Mapper} instead.">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<constructor name="IdentityMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="val" type="java.lang.Object"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[The identify function. Input key/value pair is written directly to
|
|
output.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Implements the identity function, mapping inputs directly to outputs.
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.IdentityMapper -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.IdentityReducer -->
|
|
<class name="IdentityReducer" extends="org.apache.hadoop.mapred.MapReduceBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.Reducer} instead.">
|
|
<implements name="org.apache.hadoop.mapred.Reducer"/>
|
|
<constructor name="IdentityReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="values" type="java.util.Iterator"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Writes all keys and values directly to output.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Performs no reduction, writing all input values directly to the output.
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.Reducer} instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.IdentityReducer -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.InputSampler -->
|
|
<class name="InputSampler" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Tool"/>
|
|
<constructor name="InputSampler" type="org.apache.hadoop.mapred.JobConf"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="writePartitionFile"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="sampler" type="org.apache.hadoop.mapred.lib.InputSampler.Sampler"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Write a partition file for the given job, using the Sampler provided.
|
|
Queries the sampler for a sample keyset, sorts by the output key
|
|
comparator, selects the keys for each rank, and writes to the destination
|
|
returned from {@link
|
|
org.apache.hadoop.mapred.lib.TotalOrderPartitioner#getPartitionFile}.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
<doc>
|
|
<![CDATA[Driver for InputSampler from the command line.
|
|
Configures a JobConf instance and calls {@link #writePartitionFile}.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Utility for collecting samples and writing a partition file for
|
|
{@link org.apache.hadoop.mapred.lib.TotalOrderPartitioner}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.InputSampler -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.InputSampler.IntervalSampler -->
|
|
<class name="InputSampler.IntervalSampler" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.InputSampler.Sampler"/>
|
|
<constructor name="InputSampler.IntervalSampler" type="double"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a new IntervalSampler sampling <em>all</em> splits.
|
|
@param freq The frequency with which records will be emitted.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="InputSampler.IntervalSampler" type="double, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a new IntervalSampler.
|
|
@param freq The frequency with which records will be emitted.
|
|
@param maxSplitsSampled The maximum number of splits to examine.
|
|
@see #getSample]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getSample" return="java.lang.Object[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="inf" type="org.apache.hadoop.mapred.InputFormat"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For each split sampled, emit when the ratio of the number of records
|
|
retained to the total record count is less than the specified
|
|
frequency.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Sample from s splits at regular intervals.
|
|
Useful for sorted data.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.InputSampler.IntervalSampler -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.InputSampler.RandomSampler -->
|
|
<class name="InputSampler.RandomSampler" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.InputSampler.Sampler"/>
|
|
<constructor name="InputSampler.RandomSampler" type="double, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a new RandomSampler sampling <em>all</em> splits.
|
|
This will read every split at the client, which is very expensive.
|
|
@param freq Probability with which a key will be chosen.
|
|
@param numSamples Total number of samples to obtain from all selected
|
|
splits.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="InputSampler.RandomSampler" type="double, int, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a new RandomSampler.
|
|
@param freq Probability with which a key will be chosen.
|
|
@param numSamples Total number of samples to obtain from all selected
|
|
splits.
|
|
@param maxSplitsSampled The maximum number of splits to examine.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getSample" return="java.lang.Object[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="inf" type="org.apache.hadoop.mapred.InputFormat"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Randomize the split order, then take the specified number of keys from
|
|
each split sampled, where each key is selected with the specified
|
|
probability and possibly replaced by a subsequently selected key when
|
|
the quota of keys from that split is satisfied.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Sample from random points in the input.
|
|
General-purpose sampler. Takes numSamples / maxSplitsSampled inputs from
|
|
each split.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.InputSampler.RandomSampler -->
|
|
<!-- start interface org.apache.hadoop.mapred.lib.InputSampler.Sampler -->
|
|
<interface name="InputSampler.Sampler" abstract="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="getSample" return="java.lang.Object[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="inf" type="org.apache.hadoop.mapred.InputFormat"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For a given job, collect and return a subset of the keys from the
|
|
input data.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Interface to sample using an {@link org.apache.hadoop.mapred.InputFormat}.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.lib.InputSampler.Sampler -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.InputSampler.SplitSampler -->
|
|
<class name="InputSampler.SplitSampler" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.InputSampler.Sampler"/>
|
|
<constructor name="InputSampler.SplitSampler" type="int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a SplitSampler sampling <em>all</em> splits.
|
|
Takes the first numSamples / numSplits records from each split.
|
|
@param numSamples Total number of samples to obtain from all selected
|
|
splits.]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="InputSampler.SplitSampler" type="int, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create a new SplitSampler.
|
|
@param numSamples Total number of samples to obtain from all selected
|
|
splits.
|
|
@param maxSplitsSampled The maximum number of splits to examine.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getSample" return="java.lang.Object[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="inf" type="org.apache.hadoop.mapred.InputFormat"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[From each split sampled, take the first numSamples / numSplits records.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Samples the first n records from s splits.
|
|
Inexpensive way to sample random data.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.InputSampler.SplitSampler -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.InverseMapper -->
|
|
<class name="InverseMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.lib.map.InverseMapper}
|
|
instead.">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<constructor name="InverseMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[The inverse function. Input keys and values are swapped.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link Mapper} that swaps keys and values.
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.lib.map.InverseMapper}
|
|
instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.InverseMapper -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.KeyFieldBasedComparator -->
|
|
<class name="KeyFieldBasedComparator" extends="org.apache.hadoop.io.WritableComparator"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<constructor name="KeyFieldBasedComparator"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="compare" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="b1" type="byte[]"/>
|
|
<param name="s1" type="int"/>
|
|
<param name="l1" type="int"/>
|
|
<param name="b2" type="byte[]"/>
|
|
<param name="s2" type="int"/>
|
|
<param name="l2" type="int"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This comparator implementation provides a subset of the features provided
|
|
by the Unix/GNU Sort. In particular, the supported features are:
|
|
-n, (Sort numerically)
|
|
-r, (Reverse the result of comparison)
|
|
-k pos1[,pos2], where pos is of the form f[.c][opts], where f is the number
|
|
of the field to use, and c is the number of the first character from the
|
|
beginning of the field. Fields and character posns are numbered starting
|
|
with 1; a character position of zero in pos2 indicates the field's last
|
|
character. If '.c' is omitted from pos1, it defaults to 1 (the beginning
|
|
of the field); if omitted from pos2, it defaults to 0 (the end of the
|
|
field). opts are ordering options (any of 'nr' as described above).
|
|
We assume that the fields in the key are separated by
|
|
map.output.key.field.separator.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.KeyFieldBasedComparator -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner -->
|
|
<class name="KeyFieldBasedPartitioner" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Partitioner"/>
|
|
<constructor name="KeyFieldBasedPartitioner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="getPartition" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<param name="numReduceTasks" type="int"/>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="b" type="byte[]"/>
|
|
<param name="start" type="int"/>
|
|
<param name="end" type="int"/>
|
|
<param name="currentHash" type="int"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Defines a way to partition keys based on certain key fields (also see
|
|
{@link KeyFieldBasedComparator}.
|
|
The key specification supported is of the form -k pos1[,pos2], where,
|
|
pos is of the form f[.c][opts], where f is the number
|
|
of the key field to use, and c is the number of the first character from
|
|
the beginning of the field. Fields and character posns are numbered
|
|
starting with 1; a character position of zero in pos2 indicates the
|
|
field's last character. If '.c' is omitted from pos1, it defaults to 1
|
|
(the beginning of the field); if omitted from pos2, it defaults to 0
|
|
(the end of the field).]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.LongSumReducer -->
|
|
<class name="LongSumReducer" extends="org.apache.hadoop.mapred.MapReduceBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use {@link org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer}
|
|
instead.">
|
|
<implements name="org.apache.hadoop.mapred.Reducer"/>
|
|
<constructor name="LongSumReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="values" type="java.util.Iterator"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link Reducer} that sums long values.
|
|
@deprecated Use {@link org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer}
|
|
instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.LongSumReducer -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.MultipleInputs -->
|
|
<class name="MultipleInputs" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultipleInputs"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="addInputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="inputFormatClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Add a {@link Path} with a custom {@link InputFormat} to the list of
|
|
inputs for the map-reduce job.
|
|
|
|
@param conf The configuration of the job
|
|
@param path {@link Path} to be added to the list of inputs for the job
|
|
@param inputFormatClass {@link InputFormat} class to use for this path]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addInputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="inputFormatClass" type="java.lang.Class"/>
|
|
<param name="mapperClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Add a {@link Path} with a custom {@link InputFormat} and
|
|
{@link Mapper} to the list of inputs for the map-reduce job.
|
|
|
|
@param conf The configuration of the job
|
|
@param path {@link Path} to be added to the list of inputs for the job
|
|
@param inputFormatClass {@link InputFormat} class to use for this path
|
|
@param mapperClass {@link Mapper} class to use for this path]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class supports MapReduce jobs that have multiple input paths with
|
|
a different {@link InputFormat} and {@link Mapper} for each path]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.MultipleInputs -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.MultipleOutputFormat -->
|
|
<class name="MultipleOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultipleOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Create a composite record writer that can write key/value data to different
|
|
output files
|
|
|
|
@param fs
|
|
the file system to use
|
|
@param job
|
|
the job conf for the job
|
|
@param name
|
|
the leaf file name for the output file (such as part-00000")
|
|
@param arg3
|
|
a progressable for reporting progress.
|
|
@return a composite record writer
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="generateLeafFileName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Generate the leaf name for the output file name. The default behavior does
|
|
not change the leaf file name (such as part-00000)
|
|
|
|
@param name
|
|
the leaf file name for the output file
|
|
@return the given leaf file name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="generateFileNameForKeyValue" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Generate the file output file name based on the given key and the leaf file
|
|
name. The default behavior is that the file name does not depend on the
|
|
key.
|
|
|
|
@param key
|
|
the key of the output data
|
|
@param name
|
|
the leaf file name
|
|
@return generated file name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="generateActualKey" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Generate the actual key from the given key/value. The default behavior is that
|
|
the actual key is equal to the given key
|
|
|
|
@param key
|
|
the key of the output data
|
|
@param value
|
|
the value of the output data
|
|
@return the actual key derived from the given key/value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="generateActualValue" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Generate the actual value from the given key and value. The default behavior is that
|
|
the actual value is equal to the given value
|
|
|
|
@param key
|
|
the key of the output data
|
|
@param value
|
|
the value of the output data
|
|
@return the actual value derived from the given key/value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputFileBasedOutputFileName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Generate the outfile name based on a given anme and the input file name. If
|
|
the map input file does not exists (i.e. this is not for a map only job),
|
|
the given name is returned unchanged. If the config value for
|
|
"num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given
|
|
name is returned unchanged. Otherwise, return a file name consisting of the
|
|
N trailing legs of the input file name where N is the config value for
|
|
"num.of.trailing.legs.to.use".
|
|
|
|
@param job
|
|
the job config
|
|
@param name
|
|
the output file name
|
|
@return the outfile name based on a given anme and the input file name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@param fs
|
|
the file system to use
|
|
@param job
|
|
a job conf object
|
|
@param name
|
|
the name of the file over which a record writer object will be
|
|
constructed
|
|
@param arg3
|
|
a progressable object
|
|
@return A RecordWriter object over the given file
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This abstract class extends the FileOutputFormat, allowing to write the
|
|
output data to different output files. There are three basic use cases for
|
|
this class.
|
|
|
|
Case one: This class is used for a map reduce job with at least one reducer.
|
|
The reducer wants to write data to different files depending on the actual
|
|
keys. It is assumed that a key (or value) encodes the actual key (value)
|
|
and the desired location for the actual key (value).
|
|
|
|
Case two: This class is used for a map only job. The job wants to use an
|
|
output file name that is either a part of the input file name of the input
|
|
data, or some derivation of it.
|
|
|
|
Case three: This class is used for a map only job. The job wants to use an
|
|
output file name that depends on both the keys and the input file name,]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.MultipleOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.MultipleOutputs -->
|
|
<class name="MultipleOutputs" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultipleOutputs" type="org.apache.hadoop.mapred.JobConf"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Creates and initializes multiple named outputs support, it should be
|
|
instantiated in the Mapper/Reducer configure method.
|
|
|
|
@param job the job configuration object]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getNamedOutputsList" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Returns list of channel names.
|
|
|
|
@param conf job conf
|
|
@return List of channel Names]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isMultiNamedOutput" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Returns if a named output is multiple.
|
|
|
|
@param conf job conf
|
|
@param namedOutput named output
|
|
@return <code>true</code> if the name output is multi, <code>false</code>
|
|
if it is single. If the name output is not defined it returns
|
|
<code>false</code>]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNamedOutputFormatClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Returns the named output OutputFormat.
|
|
|
|
@param conf job conf
|
|
@param namedOutput named output
|
|
@return namedOutput OutputFormat]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNamedOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Returns the key class for a named output.
|
|
|
|
@param conf job conf
|
|
@param namedOutput named output
|
|
@return class for the named output key]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNamedOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Returns the value class for a named output.
|
|
|
|
@param conf job conf
|
|
@param namedOutput named output
|
|
@return class of named output value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addNamedOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<param name="outputFormatClass" type="java.lang.Class"/>
|
|
<param name="keyClass" type="java.lang.Class"/>
|
|
<param name="valueClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Adds a named output for the job.
|
|
<p/>
|
|
|
|
@param conf job conf to add the named output
|
|
@param namedOutput named output name, it has to be a word, letters
|
|
and numbers only, cannot be the word 'part' as
|
|
that is reserved for the
|
|
default output.
|
|
@param outputFormatClass OutputFormat class.
|
|
@param keyClass key class
|
|
@param valueClass value class]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addMultiNamedOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<param name="outputFormatClass" type="java.lang.Class"/>
|
|
<param name="keyClass" type="java.lang.Class"/>
|
|
<param name="valueClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Adds a multi named output for the job.
|
|
<p/>
|
|
|
|
@param conf job conf to add the named output
|
|
@param namedOutput named output name, it has to be a word, letters
|
|
and numbers only, cannot be the word 'part' as
|
|
that is reserved for the
|
|
default output.
|
|
@param outputFormatClass OutputFormat class.
|
|
@param keyClass key class
|
|
@param valueClass value class]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setCountersEnabled"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="enabled" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Enables or disables counters for the named outputs.
|
|
<p/>
|
|
By default these counters are disabled.
|
|
<p/>
|
|
MultipleOutputs supports counters, by default the are disabled.
|
|
The counters group is the {@link MultipleOutputs} class name.
|
|
</p>
|
|
The names of the counters are the same as the named outputs. For multi
|
|
named outputs the name of the counter is the concatenation of the named
|
|
output, and underscore '_' and the multiname.
|
|
|
|
@param conf job conf to enableadd the named output.
|
|
@param enabled indicates if the counters will be enabled or not.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCountersEnabled" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Returns if the counters for the named outputs are enabled or not.
|
|
<p/>
|
|
By default these counters are disabled.
|
|
<p/>
|
|
MultipleOutputs supports counters, by default the are disabled.
|
|
The counters group is the {@link MultipleOutputs} class name.
|
|
</p>
|
|
The names of the counters are the same as the named outputs. For multi
|
|
named outputs the name of the counter is the concatenation of the named
|
|
output, and underscore '_' and the multiname.
|
|
|
|
|
|
@param conf job conf to enableadd the named output.
|
|
@return TRUE if the counters are enabled, FALSE if they are disabled.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNamedOutputs" return="java.util.Iterator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns iterator with the defined name outputs.
|
|
|
|
@return iterator with the defined named outputs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCollector" return="org.apache.hadoop.mapred.OutputCollector"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets the output collector for a named output.
|
|
<p/>
|
|
|
|
@param namedOutput the named output name
|
|
@param reporter the reporter
|
|
@return the output collector for the given named output
|
|
@throws IOException thrown if output collector could not be created]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCollector" return="org.apache.hadoop.mapred.OutputCollector"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="namedOutput" type="java.lang.String"/>
|
|
<param name="multiName" type="java.lang.String"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets the output collector for a multi named output.
|
|
<p/>
|
|
|
|
@param namedOutput the named output name
|
|
@param multiName the multi name part
|
|
@param reporter the reporter
|
|
@return the output collector for the given named output
|
|
@throws IOException thrown if output collector could not be created]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Closes all the opened named outputs.
|
|
<p/>
|
|
If overriden subclasses must invoke <code>super.close()</code> at the
|
|
end of their <code>close()</code>
|
|
|
|
@throws java.io.IOException thrown if any of the MultipleOutput files
|
|
could not be closed properly.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The MultipleOutputs class simplifies writting to additional outputs other
|
|
than the job default output via the <code>OutputCollector</code> passed to
|
|
the <code>map()</code> and <code>reduce()</code> methods of the
|
|
<code>Mapper</code> and <code>Reducer</code> implementations.
|
|
<p/>
|
|
Each additional output, or named output, may be configured with its own
|
|
<code>OutputFormat</code>, with its own key class and with its own value
|
|
class.
|
|
<p/>
|
|
A named output can be a single file or a multi file. The later is refered as
|
|
a multi named output.
|
|
<p/>
|
|
A multi named output is an unbound set of files all sharing the same
|
|
<code>OutputFormat</code>, key class and value class configuration.
|
|
<p/>
|
|
When named outputs are used within a <code>Mapper</code> implementation,
|
|
key/values written to a name output are not part of the reduce phase, only
|
|
key/values written to the job <code>OutputCollector</code> are part of the
|
|
reduce phase.
|
|
<p/>
|
|
MultipleOutputs supports counters, by default the are disabled. The counters
|
|
group is the {@link MultipleOutputs} class name.
|
|
</p>
|
|
The names of the counters are the same as the named outputs. For multi
|
|
named outputs the name of the counter is the concatenation of the named
|
|
output, and underscore '_' and the multiname.
|
|
<p/>
|
|
Job configuration usage pattern is:
|
|
<pre>
|
|
|
|
JobConf conf = new JobConf();
|
|
|
|
conf.setInputPath(inDir);
|
|
FileOutputFormat.setOutputPath(conf, outDir);
|
|
|
|
conf.setMapperClass(MOMap.class);
|
|
conf.setReducerClass(MOReduce.class);
|
|
...
|
|
|
|
// Defines additional single text based output 'text' for the job
|
|
MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class,
|
|
LongWritable.class, Text.class);
|
|
|
|
// Defines additional multi sequencefile based output 'sequence' for the
|
|
// job
|
|
MultipleOutputs.addMultiNamedOutput(conf, "seq",
|
|
SequenceFileOutputFormat.class,
|
|
LongWritable.class, Text.class);
|
|
...
|
|
|
|
JobClient jc = new JobClient();
|
|
RunningJob job = jc.submitJob(conf);
|
|
|
|
...
|
|
</pre>
|
|
<p/>
|
|
Job configuration usage pattern is:
|
|
<pre>
|
|
|
|
public class MOReduce implements
|
|
Reducer<WritableComparable, Writable> {
|
|
private MultipleOutputs mos;
|
|
|
|
public void configure(JobConf conf) {
|
|
...
|
|
mos = new MultipleOutputs(conf);
|
|
}
|
|
|
|
public void reduce(WritableComparable key, Iterator<Writable> values,
|
|
OutputCollector output, Reporter reporter)
|
|
throws IOException {
|
|
...
|
|
mos.getCollector("text", reporter).collect(key, new Text("Hello"));
|
|
mos.getCollector("seq", "A", reporter).collect(key, new Text("Bye"));
|
|
mos.getCollector("seq", "B", reporter).collect(key, new Text("Chau"));
|
|
...
|
|
}
|
|
|
|
public void close() throws IOException {
|
|
mos.close();
|
|
...
|
|
}
|
|
|
|
}
|
|
</pre>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.MultipleOutputs -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat -->
|
|
<class name="MultipleSequenceFileOutputFormat" extends="org.apache.hadoop.mapred.lib.MultipleOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultipleSequenceFileOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class extends the MultipleOutputFormat, allowing to write the output data
|
|
to different output files in sequence file output format.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.MultipleTextOutputFormat -->
|
|
<class name="MultipleTextOutputFormat" extends="org.apache.hadoop.mapred.lib.MultipleOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultipleTextOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class extends the MultipleOutputFormat, allowing to write the output
|
|
data to different output files in Text output format.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.MultipleTextOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.MultithreadedMapRunner -->
|
|
<class name="MultithreadedMapRunner" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.MapRunnable"/>
|
|
<constructor name="MultithreadedMapRunner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="input" type="org.apache.hadoop.mapred.RecordReader"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Multithreaded implementation for @link org.apache.hadoop.mapred.MapRunnable.
|
|
<p>
|
|
It can be used instead of the default implementation,
|
|
@link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU
|
|
bound in order to improve throughput.
|
|
<p>
|
|
Map implementations using this MapRunnable must be thread-safe.
|
|
<p>
|
|
The Map-Reduce job has to be configured to use this MapRunnable class (using
|
|
the JobConf.setMapRunnerClass method) and
|
|
the number of thread the thread-pool can use with the
|
|
<code>mapred.map.multithreadedrunner.threads</code> property, its default
|
|
value is 10 threads.
|
|
<p>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.MultithreadedMapRunner -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.NLineInputFormat -->
|
|
<class name="NLineInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<constructor name="NLineInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="numSplits" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Logically splits the set of input files for the job, splits N lines
|
|
of the input as one split.
|
|
|
|
@see org.apache.hadoop.mapred.FileInputFormat#getSplits(JobConf, int)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[NLineInputFormat which splits N lines of input as one split.
|
|
|
|
In many "pleasantly" parallel applications, each process/mapper
|
|
processes the same input file (s), but with computations are
|
|
controlled by different parameters.(Referred to as "parameter sweeps").
|
|
One way to achieve this, is to specify a set of parameters
|
|
(one set per line) as input in a control file
|
|
(which is the input path to the map-reduce application,
|
|
where as the input dataset is specified
|
|
via a config variable in JobConf.).
|
|
|
|
The NLineInputFormat can be used in such applications, that splits
|
|
the input file such that by default, one line is fed as
|
|
a value to one map task, and key is the offset.
|
|
i.e. (k,v) is (LongWritable, Text).
|
|
The location hints will span the whole mapred cluster.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.NLineInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.NullOutputFormat -->
|
|
<class name="NullOutputFormat" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link org.apache.hadoop.mapreduce.lib.output.NullOutputFormat} instead.">
|
|
<implements name="org.apache.hadoop.mapred.OutputFormat"/>
|
|
<constructor name="NullOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Consume all outputs and put them in /dev/null.
|
|
@deprecated Use
|
|
{@link org.apache.hadoop.mapreduce.lib.output.NullOutputFormat} instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.NullOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.RegexMapper -->
|
|
<class name="RegexMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<constructor name="RegexMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="org.apache.hadoop.io.Text"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link Mapper} that extracts text matching a regular expression.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.RegexMapper -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.TokenCountMapper -->
|
|
<class name="TokenCountMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="Use
|
|
{@link org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper} instead.">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<constructor name="TokenCountMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="org.apache.hadoop.io.Text"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link Mapper} that maps text values into <token,freq> pairs. Uses
|
|
{@link StringTokenizer} to break text into tokens.
|
|
@deprecated Use
|
|
{@link org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper} instead.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.TokenCountMapper -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.TotalOrderPartitioner -->
|
|
<class name="TotalOrderPartitioner" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Partitioner"/>
|
|
<constructor name="TotalOrderPartitioner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Read in the partition file and build indexing data structures.
|
|
If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
|
|
<tt>total.order.partitioner.natural.order</tt> is not false, a trie
|
|
of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
|
|
will be built. Otherwise, keys will be located using a binary search of
|
|
the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
|
|
defined for this job. The input file must be sorted with the same
|
|
comparator and contain {@link
|
|
org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPartition" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<param name="numPartitions" type="int"/>
|
|
</method>
|
|
<method name="setPartitionFile"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="p" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Set the path to the SequenceFile storing the sorted partition keyset.
|
|
It must be the case that for <tt>R</tt> reduces, there are <tt>R-1</tt>
|
|
keys in the SequenceFile.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPartitionFile" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the path to the SequenceFile storing the sorted partition keyset.
|
|
@see #setPartitionFile(JobConf,Path)]]>
|
|
</doc>
|
|
</method>
|
|
<field name="DEFAULT_PATH" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[Partitioner effecting a total order by reading split points from
|
|
an externally generated source.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.TotalOrderPartitioner -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapred.lib.aggregate">
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.DoubleValueSum -->
|
|
<class name="DoubleValueSum" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="DoubleValueSum"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
an object whose string representation represents a double value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="double"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
a double value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSum" return="double"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return an array of one element. The element is a string
|
|
representation of the aggregated value. The return value is
|
|
expected to be used by the a combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that sums up a sequence of double
|
|
values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.DoubleValueSum -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueMax -->
|
|
<class name="LongValueMax" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="LongValueMax"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[the default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
an object whose string representation represents a long value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="newVal" type="long"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param newVal
|
|
a long value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getVal" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return an array of one element. The element is a string
|
|
representation of the aggregated value. The return value is
|
|
expected to be used by the a combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that maintain the maximum of
|
|
a sequence of long values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueMax -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueMin -->
|
|
<class name="LongValueMin" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="LongValueMin"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[the default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
an object whose string representation represents a long value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="newVal" type="long"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param newVal
|
|
a long value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getVal" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return an array of one element. The element is a string
|
|
representation of the aggregated value. The return value is
|
|
expected to be used by the a combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that maintain the minimum of
|
|
a sequence of long values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueMin -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueSum -->
|
|
<class name="LongValueSum" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="LongValueSum"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[the default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
an object whose string representation represents a long value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="long"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
a long value.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSum" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return an array of one element. The element is a string
|
|
representation of the aggregated value. The return value is
|
|
expected to be used by the a combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that sums up
|
|
a sequence of long values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueSum -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.StringValueMax -->
|
|
<class name="StringValueMax" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="StringValueMax"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[the default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
a string.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getVal" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return an array of one element. The element is a string
|
|
representation of the aggregated value. The return value is
|
|
expected to be used by the a combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that maintain the biggest of
|
|
a sequence of strings.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.StringValueMax -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.StringValueMin -->
|
|
<class name="StringValueMin" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="StringValueMin"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[the default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
a string.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getVal" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of the aggregated value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return an array of one element. The element is a string
|
|
representation of the aggregated value. The return value is
|
|
expected to be used by the a combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that maintain the smallest of
|
|
a sequence of strings.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.StringValueMin -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.UniqValueCount -->
|
|
<class name="UniqValueCount" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="UniqValueCount"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[the default constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="UniqValueCount" type="long"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[constructor
|
|
@param maxNum the limit in the number of unique values to keep.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="setMaxItems" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="n" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the limit on the number of unique values
|
|
@param n the desired limit on the number of unique values
|
|
@return the new limit on the number of unique values]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val
|
|
an object.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return the number of unique objects aggregated]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUniqueItems" return="java.util.Set"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the set of the unique objects]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return return an array of the unique objects. The return value is
|
|
expected to be used by the a combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that dedupes a sequence of objects.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.UniqValueCount -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.UserDefinedValueAggregatorDescriptor -->
|
|
<class name="UserDefinedValueAggregatorDescriptor" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor"/>
|
|
<constructor name="UserDefinedValueAggregatorDescriptor" type="java.lang.String, org.apache.hadoop.mapred.JobConf"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@param className the class name of the user defined descriptor class
|
|
@param job a configure object used for decriptor configuration]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="createInstance" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="className" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Create an instance of the given class
|
|
@param className the name of the class
|
|
@return a dynamically created instance of the given class]]>
|
|
</doc>
|
|
</method>
|
|
<method name="generateKeyValPairs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Generate a list of aggregation-id/value pairs for the given key/value pairs
|
|
by delegating the invocation to the real object.
|
|
|
|
@param key
|
|
input key
|
|
@param val
|
|
input value
|
|
@return a list of aggregation id/value pairs. An aggregation id encodes an
|
|
aggregation type which is used to guide the way to aggregate the
|
|
value in the reduce/combiner phrase of an Aggregate based job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of this object.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Do nothing.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a wrapper for a user defined value aggregator descriptor.
|
|
It servs two functions: One is to create an object of ValueAggregatorDescriptor from the
|
|
name of a user defined class that may be dynamically loaded. The other is to
|
|
deligate inviokations of generateKeyValPairs function to the created object.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.UserDefinedValueAggregatorDescriptor -->
|
|
<!-- start interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregator -->
|
|
<interface name="ValueAggregator" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add a value to the aggregator
|
|
|
|
@param val the value to be added]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of the agregator]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return an array of values as the outputs of the combiner.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This interface defines the minimal protocol for value aggregators.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregator -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor -->
|
|
<class name="ValueAggregatorBaseDescriptor" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor"/>
|
|
<constructor name="ValueAggregatorBaseDescriptor"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="generateEntry" return="java.util.Map.Entry"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="type" type="java.lang.String"/>
|
|
<param name="id" type="java.lang.String"/>
|
|
<param name="val" type="org.apache.hadoop.io.Text"/>
|
|
<doc>
|
|
<![CDATA[@param type the aggregation type
|
|
@param id the aggregation id
|
|
@param val the val associated with the id to be aggregated
|
|
@return an Entry whose key is the aggregation id prefixed with
|
|
the aggregation type.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="generateValueAggregator" return="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="type" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[@param type the aggregation type
|
|
@return a value aggregator of the given type.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="generateKeyValPairs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Generate 1 or 2 aggregation-id/value pairs for the given key/value pair.
|
|
The first id will be of type LONG_VALUE_SUM, with "record_count" as
|
|
its aggregation id. If the input is a file split,
|
|
the second id of the same type will be generated too, with the file name
|
|
as its aggregation id. This achieves the behavior of counting the total number
|
|
of records in the input data, and the number of records in each input file.
|
|
|
|
@param key
|
|
input key
|
|
@param val
|
|
input value
|
|
@return a list of aggregation id/value pairs. An aggregation id encodes an
|
|
aggregation type which is used to guide the way to aggregate the
|
|
value in the reduce/combiner phrase of an Aggregate based job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[get the input file name.
|
|
|
|
@param job a job configuration object]]>
|
|
</doc>
|
|
</method>
|
|
<field name="UNIQ_VALUE_COUNT" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="LONG_VALUE_SUM" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="DOUBLE_VALUE_SUM" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="VALUE_HISTOGRAM" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="LONG_VALUE_MAX" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="LONG_VALUE_MIN" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="STRING_VALUE_MAX" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="STRING_VALUE_MIN" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="inputFile" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This class implements the common functionalities of
|
|
the subclasses of ValueAggregatorDescriptor class.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorCombiner -->
|
|
<class name="ValueAggregatorCombiner" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ValueAggregatorCombiner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Combiner does not need to configure.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.Text"/>
|
|
<param name="values" type="java.util.Iterator"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Combines values for a given key.
|
|
@param key the key is expected to be a Text object, whose prefix indicates
|
|
the type of aggregation to aggregate the values.
|
|
@param values the values to combine
|
|
@param output to collect combined values]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Do nothing.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="arg0" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<param name="arg1" type="org.apache.hadoop.io.Writable"/>
|
|
<param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="arg3" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Do nothing. Should not be called.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements the generic combiner of Aggregate.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorCombiner -->
|
|
<!-- start interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor -->
|
|
<interface name="ValueAggregatorDescriptor" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="generateKeyValPairs" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[Generate a list of aggregation-id/value pairs for the given key/value pair.
|
|
This function is usually called by the mapper of an Aggregate based job.
|
|
|
|
@param key
|
|
input key
|
|
@param val
|
|
input value
|
|
@return a list of aggregation id/value pairs. An aggregation id encodes an
|
|
aggregation type which is used to guide the way to aggregate the
|
|
value in the reduce/combiner phrase of an Aggregate based job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Configure the object
|
|
|
|
@param job
|
|
a JobConf object that may contain the information that can be used
|
|
to configure the object.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="TYPE_SEPARATOR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="ONE" type="org.apache.hadoop.io.Text"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This interface defines the contract a value aggregator descriptor must
|
|
support. Such a descriptor can be configured with a JobConf object. Its main
|
|
function is to generate a list of aggregation-id/value pairs. An aggregation
|
|
id encodes an aggregation type which is used to guide the way to aggregate
|
|
the value in the reduce/combiner phrase of an Aggregate based job.The mapper in
|
|
an Aggregate based map/reduce job may create one or more of
|
|
ValueAggregatorDescriptor objects at configuration time. For each input
|
|
key/value pair, the mapper will use those objects to create aggregation
|
|
id/value pairs.]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob -->
|
|
<class name="ValueAggregatorJob" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ValueAggregatorJob"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="createValueAggregatorJobs" return="org.apache.hadoop.mapred.jobcontrol.JobControl"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<param name="descriptors" type="java.lang.Class[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="createValueAggregatorJobs" return="org.apache.hadoop.mapred.jobcontrol.JobControl"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Create an Aggregate based map/reduce job.
|
|
|
|
@param args the arguments used for job creation. Generic hadoop
|
|
arguments are accepted.
|
|
@return a JobConf object ready for submission.
|
|
|
|
@throws IOException
|
|
@see GenericOptionsParser]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<param name="descriptors" type="java.lang.Class[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="setAggregatorDescriptors"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="descriptors" type="java.lang.Class[]"/>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[create and run an Aggregate based map/reduce job.
|
|
|
|
@param args the arguments used for job creation
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This is the main class for creating a map/reduce job using Aggregate
|
|
framework. The Aggregate is a specialization of map/reduce framework,
|
|
specilizing for performing various simple aggregations.
|
|
|
|
Generally speaking, in order to implement an application using Map/Reduce
|
|
model, the developer is to implement Map and Reduce functions (and possibly
|
|
combine function). However, a lot of applications related to counting and
|
|
statistics computing have very similar characteristics. Aggregate abstracts
|
|
out the general patterns of these functions and implementing those patterns.
|
|
In particular, the package provides generic mapper/redducer/combiner classes,
|
|
and a set of built-in value aggregators, and a generic utility class that
|
|
helps user create map/reduce jobs using the generic class. The built-in
|
|
aggregators include:
|
|
|
|
sum over numeric values count the number of distinct values compute the
|
|
histogram of values compute the minimum, maximum, media,average, standard
|
|
deviation of numeric values
|
|
|
|
The developer using Aggregate will need only to provide a plugin class
|
|
conforming to the following interface:
|
|
|
|
public interface ValueAggregatorDescriptor { public ArrayList<Entry>
|
|
generateKeyValPairs(Object key, Object value); public void
|
|
configure(JobConfjob); }
|
|
|
|
The package also provides a base class, ValueAggregatorBaseDescriptor,
|
|
implementing the above interface. The user can extend the base class and
|
|
implement generateKeyValPairs accordingly.
|
|
|
|
The primary work of generateKeyValPairs is to emit one or more key/value
|
|
pairs based on the input key/value pair. The key in an output key/value pair
|
|
encode two pieces of information: aggregation type and aggregation id. The
|
|
value will be aggregated onto the aggregation id according the aggregation
|
|
type.
|
|
|
|
This class offers a function to generate a map/reduce job using Aggregate
|
|
framework. The function takes the following parameters: input directory spec
|
|
input format (text or sequence file) output directory a file specifying the
|
|
user plugin class]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase -->
|
|
<class name="ValueAggregatorJobBase" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<implements name="org.apache.hadoop.mapred.Reducer"/>
|
|
<constructor name="ValueAggregatorJobBase"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="logSpec"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="aggregatorDescriptorList" type="java.util.ArrayList"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[This abstract class implements some common functionalities of the
|
|
the generic mapper, reducer and combiner classes of Aggregate.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorMapper -->
|
|
<class name="ValueAggregatorMapper" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ValueAggregatorMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<param name="value" type="org.apache.hadoop.io.Writable"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[the map function. It iterates through the value aggregator descriptor
|
|
list to generate aggregation id/value pairs and emit them.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="arg0" type="org.apache.hadoop.io.Text"/>
|
|
<param name="arg1" type="java.util.Iterator"/>
|
|
<param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="arg3" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Do nothing. Should not be called.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements the generic mapper of Aggregate.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorMapper -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorReducer -->
|
|
<class name="ValueAggregatorReducer" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ValueAggregatorReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.Text"/>
|
|
<param name="values" type="java.util.Iterator"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@param key
|
|
the key is expected to be a Text object, whose prefix indicates
|
|
the type of aggregation to aggregate the values. In effect, data
|
|
driven computing is achieved. It is assumed that each aggregator's
|
|
getReport method emits appropriate output for the aggregator. This
|
|
may be further customiized.
|
|
@value the values to be aggregated]]>
|
|
</doc>
|
|
</method>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="arg0" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<param name="arg1" type="org.apache.hadoop.io.Writable"/>
|
|
<param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="arg3" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Do nothing. Should not be called]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements the generic reducer of Aggregate.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorReducer -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueHistogram -->
|
|
<class name="ValueHistogram" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
|
|
<constructor name="ValueHistogram"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="addNextValue"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="val" type="java.lang.Object"/>
|
|
<doc>
|
|
<![CDATA[add the given val to the aggregator.
|
|
|
|
@param val the value to be added. It is expected to be a string
|
|
in the form of xxxx\tnum, meaning xxxx has num occurrences.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReport" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return the string representation of this aggregator.
|
|
It includes the following basic statistics of the histogram:
|
|
the number of unique values
|
|
the minimum value
|
|
the media value
|
|
the maximum value
|
|
the average value
|
|
the standard deviation]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReportDetails" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return a string representation of the list of value/frequence pairs of
|
|
the histogram]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerOutput" return="java.util.ArrayList"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return a list value/frequence pairs.
|
|
The return value is expected to be used by the reducer.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReportItems" return="java.util.TreeMap"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return a TreeMap representation of the histogram]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reset"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[reset the aggregator]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class implements a value aggregator that computes the
|
|
histogram of a sequence of strings.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueHistogram -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapred.lib.db">
|
|
<!-- start class org.apache.hadoop.mapred.lib.db.DBConfiguration -->
|
|
<class name="DBConfiguration" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="configureDB"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="driverClass" type="java.lang.String"/>
|
|
<param name="dbUrl" type="java.lang.String"/>
|
|
<param name="userName" type="java.lang.String"/>
|
|
<param name="passwd" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Sets the DB access related fields in the JobConf.
|
|
@param job the job
|
|
@param driverClass JDBC Driver class name
|
|
@param dbUrl JDBC DB access URL.
|
|
@param userName DB access username
|
|
@param passwd DB access passwd]]>
|
|
</doc>
|
|
</method>
|
|
<method name="configureDB"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="driverClass" type="java.lang.String"/>
|
|
<param name="dbUrl" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Sets the DB access related fields in the JobConf.
|
|
@param job the job
|
|
@param driverClass JDBC Driver class name
|
|
@param dbUrl JDBC DB access URL.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="DRIVER_CLASS_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The JDBC Driver class name]]>
|
|
</doc>
|
|
</field>
|
|
<field name="URL_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[JDBC Database access URL]]>
|
|
</doc>
|
|
</field>
|
|
<field name="USERNAME_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[User name to access the database]]>
|
|
</doc>
|
|
</field>
|
|
<field name="PASSWORD_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Password to access the database]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_TABLE_NAME_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Input table name]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_FIELD_NAMES_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Field names in the Input table]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_CONDITIONS_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[WHERE clause in the input SELECT statement]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_ORDER_BY_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[ORDER BY clause in the input SELECT statement]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_QUERY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Whole input query, exluding LIMIT...OFFSET]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_COUNT_QUERY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Input query to get the count of records]]>
|
|
</doc>
|
|
</field>
|
|
<field name="INPUT_CLASS_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Class name implementing DBWritable which will hold input tuples]]>
|
|
</doc>
|
|
</field>
|
|
<field name="OUTPUT_TABLE_NAME_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Output table name]]>
|
|
</doc>
|
|
</field>
|
|
<field name="OUTPUT_FIELD_NAMES_PROPERTY" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Field names in the Output table]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A container for configuration property names for jobs with DB input/output.
|
|
<br>
|
|
The job can be configured using the static methods in this class,
|
|
{@link DBInputFormat}, and {@link DBOutputFormat}.
|
|
<p>
|
|
Alternatively, the properties can be set in the configuration with proper
|
|
values.
|
|
|
|
@see DBConfiguration#configureDB(JobConf, String, String, String, String)
|
|
@see DBInputFormat#setInput(JobConf, Class, String, String)
|
|
@see DBInputFormat#setInput(JobConf, Class, String, String, String, String...)
|
|
@see DBOutputFormat#setOutput(JobConf, String, String...)]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.db.DBConfiguration -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat -->
|
|
<class name="DBInputFormat" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.InputFormat"/>
|
|
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
|
|
<constructor name="DBInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="chunks" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCountQuery" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the query for getting the total number of rows,
|
|
subclasses can override this for custom behaviour.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="inputClass" type="java.lang.Class"/>
|
|
<param name="tableName" type="java.lang.String"/>
|
|
<param name="conditions" type="java.lang.String"/>
|
|
<param name="orderBy" type="java.lang.String"/>
|
|
<param name="fieldNames" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[Initializes the map-part of the job with the appropriate input settings.
|
|
|
|
@param job The job
|
|
@param inputClass the class object implementing DBWritable, which is the
|
|
Java object holding tuple fields.
|
|
@param tableName The table to read data from
|
|
@param conditions The condition which to select data with, eg. '(updated >
|
|
20070101 AND length > 0)'
|
|
@param orderBy the fieldNames in the orderBy clause.
|
|
@param fieldNames The field names in the table
|
|
@see #setInput(JobConf, Class, String, String)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="inputClass" type="java.lang.Class"/>
|
|
<param name="inputQuery" type="java.lang.String"/>
|
|
<param name="inputCountQuery" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Initializes the map-part of the job with the appropriate input settings.
|
|
|
|
@param job The job
|
|
@param inputClass the class object implementing DBWritable, which is the
|
|
Java object holding tuple fields.
|
|
@param inputQuery the input query to select fields. Example :
|
|
"SELECT f1, f2, f3 FROM Mytable ORDER BY f1"
|
|
@param inputCountQuery the input query that returns the number of records in
|
|
the table.
|
|
Example : "SELECT COUNT(f1) FROM Mytable"
|
|
@see #setInput(JobConf, Class, String, String, String, String...)]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A InputFormat that reads input data from an SQL table.
|
|
<p>
|
|
DBInputFormat emits LongWritables containing the record number as
|
|
key and DBWritables as value.
|
|
|
|
The SQL query, and input class can be using one of the two
|
|
setInput methods.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit -->
|
|
<class name="DBInputFormat.DBInputSplit" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.InputSplit"/>
|
|
<constructor name="DBInputFormat.DBInputSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Default Constructor]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="DBInputFormat.DBInputSplit" type="long, long"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Convenience Constructor
|
|
@param start the index of the first row to select
|
|
@param end the index of the last row to select]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getStart" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return The index of the first row to select]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getEnd" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[@return The index of the last row to select]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[@return The total row count in this split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="input" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="output" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A InputSplit that spans a set of rows]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBRecordReader -->
|
|
<class name="DBInputFormat.DBRecordReader" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordReader"/>
|
|
<constructor name="DBInputFormat.DBRecordReader" type="org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.mapred.JobConf"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
<doc>
|
|
<![CDATA[@param split The InputSplit to read data for
|
|
@throws SQLException]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getSelectQuery" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the query for selecting the records,
|
|
subclasses can override this for custom behaviour.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createKey" return="org.apache.hadoop.io.LongWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createValue" return="org.apache.hadoop.mapred.lib.db.DBWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPos" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="next" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.LongWritable"/>
|
|
<param name="value" type="org.apache.hadoop.mapred.lib.db.DBWritable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A RecordReader that reads records from a SQL table.
|
|
Emits LongWritables containing the record number as
|
|
key and DBWritables as value.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat.NullDBWritable -->
|
|
<class name="DBInputFormat.NullDBWritable" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.lib.db.DBWritable"/>
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="DBInputFormat.NullDBWritable"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="arg0" type="java.sql.ResultSet"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="arg0" type="java.sql.PreparedStatement"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A Class that does nothing, implementing DBWritable]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat.NullDBWritable -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.db.DBOutputFormat -->
|
|
<class name="DBOutputFormat" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.OutputFormat"/>
|
|
<constructor name="DBOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="constructQuery" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="table" type="java.lang.String"/>
|
|
<param name="fieldNames" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[Constructs the query used as the prepared statement to insert data.
|
|
|
|
@param table
|
|
the table to insert into
|
|
@param fieldNames
|
|
the fields to insert into. If field names are unknown, supply an
|
|
array of nulls.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="filesystem" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="filesystem" type="org.apache.hadoop.fs.FileSystem"/>
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="tableName" type="java.lang.String"/>
|
|
<param name="fieldNames" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[Initializes the reduce-part of the job with the appropriate output settings
|
|
|
|
@param job
|
|
The job
|
|
@param tableName
|
|
The table to insert data into
|
|
@param fieldNames
|
|
The field names in the table. If unknown, supply the appropriate
|
|
number of nulls.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A OutputFormat that sends the reduce output to a SQL table.
|
|
<p>
|
|
{@link DBOutputFormat} accepts <key,value> pairs, where
|
|
key has a type extending DBWritable. Returned {@link RecordWriter}
|
|
writes <b>only the key</b> to the database with a batch SQL query.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.db.DBOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapred.lib.db.DBOutputFormat.DBRecordWriter -->
|
|
<class name="DBOutputFormat.DBRecordWriter" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.RecordWriter"/>
|
|
<constructor name="DBOutputFormat.DBRecordWriter" type="java.sql.Connection, java.sql.PreparedStatement"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
</constructor>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.mapred.lib.db.DBWritable"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[{@inheritDoc}]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A RecordWriter that writes the reduce output to a SQL table]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.lib.db.DBOutputFormat.DBRecordWriter -->
|
|
<!-- start interface org.apache.hadoop.mapred.lib.db.DBWritable -->
|
|
<interface name="DBWritable" abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="statement" type="java.sql.PreparedStatement"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
<doc>
|
|
<![CDATA[Sets the fields of the object in the {@link PreparedStatement}.
|
|
@param statement the statement that the fields are put into.
|
|
@throws SQLException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="resultSet" type="java.sql.ResultSet"/>
|
|
<exception name="SQLException" type="java.sql.SQLException"/>
|
|
<doc>
|
|
<![CDATA[Reads the fields of the object from the {@link ResultSet}.
|
|
@param resultSet the {@link ResultSet} to get the fields from.
|
|
@throws SQLException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Objects that are read from/written to a database should implement
|
|
<code>DBWritable</code>. DBWritable, is similar to {@link Writable}
|
|
except that the {@link #write(PreparedStatement)} method takes a
|
|
{@link PreparedStatement}, and {@link #readFields(ResultSet)}
|
|
takes a {@link ResultSet}.
|
|
<p>
|
|
Implementations are responsible for writing the fields of the object
|
|
to PreparedStatement, and reading the fields of the object from the
|
|
ResultSet.
|
|
|
|
<p>Example:</p>
|
|
If we have the following table in the database :
|
|
<pre>
|
|
CREATE TABLE MyTable (
|
|
counter INTEGER NOT NULL,
|
|
timestamp BIGINT NOT NULL,
|
|
);
|
|
</pre>
|
|
then we can read/write the tuples from/to the table with :
|
|
<p><pre>
|
|
public class MyWritable implements Writable, DBWritable {
|
|
// Some data
|
|
private int counter;
|
|
private long timestamp;
|
|
|
|
//Writable#write() implementation
|
|
public void write(DataOutput out) throws IOException {
|
|
out.writeInt(counter);
|
|
out.writeLong(timestamp);
|
|
}
|
|
|
|
//Writable#readFields() implementation
|
|
public void readFields(DataInput in) throws IOException {
|
|
counter = in.readInt();
|
|
timestamp = in.readLong();
|
|
}
|
|
|
|
public void write(PreparedStatement statement) throws SQLException {
|
|
statement.setInt(1, counter);
|
|
statement.setLong(2, timestamp);
|
|
}
|
|
|
|
public void readFields(ResultSet resultSet) throws SQLException {
|
|
counter = resultSet.getInt(1);
|
|
timestamp = resultSet.getLong(2);
|
|
}
|
|
}
|
|
</pre></p>]]>
|
|
</doc>
|
|
</interface>
|
|
<!-- end interface org.apache.hadoop.mapred.lib.db.DBWritable -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapred.pipes">
|
|
<!-- start class org.apache.hadoop.mapred.pipes.Submitter -->
|
|
<class name="Submitter" extends="org.apache.hadoop.conf.Configured"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Tool"/>
|
|
<constructor name="Submitter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="Submitter" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getExecutable" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Get the URI of the application's executable.
|
|
@param conf
|
|
@return the URI where the application's executable is located]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setExecutable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="executable" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Set the URI for the application's executable. Normally this is a hdfs:
|
|
location.
|
|
@param conf
|
|
@param executable The URI of the application's executable.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setIsJavaRecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="value" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the job is using a Java RecordReader.
|
|
@param conf the configuration to modify
|
|
@param value the new value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getIsJavaRecordReader" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Check whether the job is using a Java RecordReader
|
|
@param conf the configuration to check
|
|
@return is it a Java RecordReader?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setIsJavaMapper"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="value" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the Mapper is written in Java.
|
|
@param conf the configuration to modify
|
|
@param value the new value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getIsJavaMapper" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Check whether the job is using a Java Mapper.
|
|
@param conf the configuration to check
|
|
@return is it a Java Mapper?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setIsJavaReducer"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="value" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the Reducer is written in Java.
|
|
@param conf the configuration to modify
|
|
@param value the new value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getIsJavaReducer" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Check whether the job is using a Java Reducer.
|
|
@param conf the configuration to check
|
|
@return is it a Java Reducer?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setIsJavaRecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="value" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the job will use a Java RecordWriter.
|
|
@param conf the configuration to modify
|
|
@param value the new value to set]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getIsJavaRecordWriter" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Will the reduce use a Java RecordWriter?
|
|
@param conf the configuration to check
|
|
@return true, if the output of the job will be written by Java]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getKeepCommandFile" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<doc>
|
|
<![CDATA[Does the user want to keep the command file for debugging? If this is
|
|
true, pipes will write a copy of the command data to a file in the
|
|
task directory named "downlink.data", which may be used to run the C++
|
|
program under the debugger. You probably also want to set
|
|
JobConf.setKeepFailedTaskFiles(true) to keep the entire directory from
|
|
being deleted.
|
|
To run using the data file, set the environment variable
|
|
"hadoop.pipes.command.file" to point to the file.
|
|
@param conf the configuration to check
|
|
@return will the framework save the command file?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setKeepCommandFile"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<param name="keep" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether to keep the command file for debugging
|
|
@param conf the configuration to modify
|
|
@param keep the new value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="submitJob" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="Use {@link Submitter#runJob(JobConf)}">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Submit a job to the map/reduce cluster. All of the necessary modifications
|
|
to the job to run under pipes are made to the configuration.
|
|
@param conf the job to submit to the cluster (MODIFIED)
|
|
@throws IOException
|
|
@deprecated Use {@link Submitter#runJob(JobConf)}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="runJob" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Submit a job to the map/reduce cluster. All of the necessary modifications
|
|
to the job to run under pipes are made to the configuration.
|
|
@param conf the job to submit to the cluster (MODIFIED)
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="jobSubmit" return="org.apache.hadoop.mapred.RunningJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Submit a job to the Map-Reduce framework.
|
|
This returns a handle to the {@link RunningJob} which can be used to track
|
|
the running-job.
|
|
|
|
@param conf the job configuration.
|
|
@return a handle to the {@link RunningJob} which can be used to track the
|
|
running-job.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
<doc>
|
|
<![CDATA[Submit a pipes job based on the command line arguments.
|
|
@param args]]>
|
|
</doc>
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[The main entry point and job submitter. It may either be used as a command
|
|
line-based or API-based method to launch Pipes jobs.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.pipes.Submitter -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapred.tools">
|
|
<!-- start class org.apache.hadoop.mapred.tools.MRAdmin -->
|
|
<class name="MRAdmin" extends="org.apache.hadoop.conf.Configured"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Tool"/>
|
|
<constructor name="MRAdmin"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="MRAdmin" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Administrative access to Hadoop Map-Reduce.
|
|
|
|
Currently it only provides the ability to connect to the {@link JobTracker}
|
|
and refresh the service-level authorization policy.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapred.tools.MRAdmin -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce">
|
|
<!-- start class org.apache.hadoop.mapreduce.Counter -->
|
|
<class name="Counter" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="Counter"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="Counter" type="java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setDisplayName"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="displayName" type="java.lang.String"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read the binary representation of the counter]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Write the binary representation of the counter]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getDisplayName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the name of the counter.
|
|
@return the user facing name of the counter]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getValue" return="long"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[What is the current value of this counter?
|
|
@return the current value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="increment"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="incr" type="long"/>
|
|
<doc>
|
|
<![CDATA[Increment this counter by the given value
|
|
@param incr the value to increase this counter by]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericRight" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A named counter that tracks the progress of a map/reduce job.
|
|
|
|
<p><code>Counters</code> represent global counters, defined either by the
|
|
Map-Reduce framework or applications. Each <code>Counter</code> is named by
|
|
an {@link Enum} and has a long for the value.</p>
|
|
|
|
<p><code>Counters</code> are bunched into Groups, each comprising of
|
|
counters from a particular <code>Enum</code> class.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Counter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.CounterGroup -->
|
|
<class name="CounterGroup" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<implements name="java.lang.Iterable"/>
|
|
<constructor name="CounterGroup" type="java.lang.String"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="CounterGroup" type="java.lang.String, java.lang.String"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the internal name of the group
|
|
@return the internal name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDisplayName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the display name of the group.
|
|
@return the human readable name]]>
|
|
</doc>
|
|
</method>
|
|
<method name="findCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="counterName" type="java.lang.String"/>
|
|
<param name="displayName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Internal to find a counter in a group.
|
|
@param counterName the name of the counter
|
|
@param displayName the display name of the counter
|
|
@return the counter that was found or added]]>
|
|
</doc>
|
|
</method>
|
|
<method name="findCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="counterName" type="java.lang.String"/>
|
|
</method>
|
|
<method name="iterator" return="java.util.Iterator"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="size" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the number of counters in this group.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericRight" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="incrAllCounters"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="rightGroup" type="org.apache.hadoop.mapreduce.CounterGroup"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A group of {@link Counter}s that logically belong together. Typically,
|
|
it is an {@link Enum} subclass and the counters are the values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.CounterGroup -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Counters -->
|
|
<class name="Counters" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<implements name="java.lang.Iterable"/>
|
|
<constructor name="Counters"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="findCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="groupName" type="java.lang.String"/>
|
|
<param name="counterName" type="java.lang.String"/>
|
|
</method>
|
|
<method name="findCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Enum"/>
|
|
<doc>
|
|
<![CDATA[Find the counter for the given enum. The same enum will always return the
|
|
same counter.
|
|
@param key the counter key
|
|
@return the matching counter object]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getGroupNames" return="java.util.Collection"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the names of all counter classes.
|
|
@return Set of counter names.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="iterator" return="java.util.Iterator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getGroup" return="org.apache.hadoop.mapreduce.CounterGroup"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="groupName" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Returns the named counter group, or an empty group if there is none
|
|
with the specified name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="countCounters" return="int"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the total number of counters, by summing the number of counters
|
|
in each group.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Write the set of groups.
|
|
The external format is:
|
|
#groups (groupName group)*
|
|
|
|
i.e. the number of groups followed by 0 or more groups, where each
|
|
group is of the form:
|
|
|
|
groupDisplayName #counters (false | true counter)*
|
|
|
|
where each counter is of the form:
|
|
|
|
name (false | true displayName) value]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Read a set of groups.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return textual representation of the counter values.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="incrAllCounters"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="other" type="org.apache.hadoop.mapreduce.Counters"/>
|
|
<doc>
|
|
<![CDATA[Increments multiple counters by their amounts in another Counters
|
|
instance.
|
|
@param other the other Counters instance]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericRight" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Counters -->
|
|
<!-- start class org.apache.hadoop.mapreduce.ID -->
|
|
<class name="ID" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.WritableComparable"/>
|
|
<constructor name="ID" type="int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[constructs an ID object from the given int]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="ID"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getId" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[returns the int which represents the identifier]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="compareTo" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="that" type="org.apache.hadoop.mapreduce.ID"/>
|
|
<doc>
|
|
<![CDATA[Compare IDs by associated numbers]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="SEPARATOR" type="char"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="id" type="int"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A general identifier, which internally stores the id
|
|
as an integer. This is the super class of {@link JobID},
|
|
{@link TaskID} and {@link TaskAttemptID}.
|
|
|
|
@see JobID
|
|
@see TaskID
|
|
@see TaskAttemptID]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.ID -->
|
|
<!-- start class org.apache.hadoop.mapreduce.InputFormat -->
|
|
<class name="InputFormat" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="InputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getSplits" return="java.util.List"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Logically split the set of input files for the job.
|
|
|
|
<p>Each {@link InputSplit} is then assigned to an individual {@link Mapper}
|
|
for processing.</p>
|
|
|
|
<p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the
|
|
input files are not physically split into chunks. For e.g. a split could
|
|
be <i><input-file-path, start, offset></i> tuple. The InputFormat
|
|
also creates the {@link RecordReader} to read the {@link InputSplit}.
|
|
|
|
@param context job configuration.
|
|
@return an array of {@link InputSplit}s for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Create a record reader for a given split. The framework will call
|
|
{@link RecordReader#initialize(InputSplit, TaskAttemptContext)} before
|
|
the split is used.
|
|
@param split the split to be read
|
|
@param context the information about the task
|
|
@return a new record reader
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>InputFormat</code> describes the input-specification for a
|
|
Map-Reduce job.
|
|
|
|
<p>The Map-Reduce framework relies on the <code>InputFormat</code> of the
|
|
job to:<p>
|
|
<ol>
|
|
<li>
|
|
Validate the input-specification of the job.
|
|
<li>
|
|
Split-up the input file(s) into logical {@link InputSplit}s, each of
|
|
which is then assigned to an individual {@link Mapper}.
|
|
</li>
|
|
<li>
|
|
Provide the {@link RecordReader} implementation to be used to glean
|
|
input records from the logical <code>InputSplit</code> for processing by
|
|
the {@link Mapper}.
|
|
</li>
|
|
</ol>
|
|
|
|
<p>The default behavior of file-based {@link InputFormat}s, typically
|
|
sub-classes of {@link FileInputFormat}, is to split the
|
|
input into <i>logical</i> {@link InputSplit}s based on the total size, in
|
|
bytes, of the input files. However, the {@link FileSystem} blocksize of
|
|
the input files is treated as an upper bound for input splits. A lower bound
|
|
on the split size can be set via
|
|
<a href="{@docRoot}/../mapred-default.html#mapred.min.split.size">
|
|
mapred.min.split.size</a>.</p>
|
|
|
|
<p>Clearly, logical splits based on input-size is insufficient for many
|
|
applications since record boundaries are to respected. In such cases, the
|
|
application has to also implement a {@link RecordReader} on whom lies the
|
|
responsibility to respect record-boundaries and present a record-oriented
|
|
view of the logical <code>InputSplit</code> to the individual task.
|
|
|
|
@see InputSplit
|
|
@see RecordReader
|
|
@see FileInputFormat]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.InputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.InputSplit -->
|
|
<class name="InputSplit" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="InputSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getLength" return="long"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the size of the split, so that the input splits can be sorted by size.
|
|
@return the number of bytes in the split
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the list of nodes by name where the data for the split would be local.
|
|
The locations do not need to be serialized.
|
|
@return a new array of the node nodes.
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>InputSplit</code> represents the data to be processed by an
|
|
individual {@link Mapper}.
|
|
|
|
<p>Typically, it presents a byte-oriented view on the input and is the
|
|
responsibility of {@link RecordReader} of the job to process this and present
|
|
a record-oriented view.
|
|
|
|
@see InputFormat
|
|
@see RecordReader]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.InputSplit -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Job -->
|
|
<class name="Job" extends="org.apache.hadoop.mapreduce.JobContext"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Job"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<constructor name="Job" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<constructor name="Job" type="org.apache.hadoop.conf.Configuration, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="setNumReduceTasks"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="tasks" type="int"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the number of reduce tasks for the job.
|
|
@param tasks the number of reduce tasks
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setWorkingDirectory"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="dir" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Set the current working directory for the default file system.
|
|
|
|
@param dir the new current working directory.
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInputFormatClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link InputFormat} for the job.
|
|
@param cls the <code>InputFormat</code> to use
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputFormatClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link OutputFormat} for the job.
|
|
@param cls the <code>OutputFormat</code> to use
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapperClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Mapper} for the job.
|
|
@param cls the <code>Mapper</code> to use
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJarByClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the Jar by finding where a given class came from.
|
|
@param cls the example class]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJar" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the pathname of the job's jar.
|
|
@return the pathname]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setCombinerClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the combiner class for the job.
|
|
@param cls the combiner to use
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setReducerClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Reducer} for the job.
|
|
@param cls the <code>Reducer</code> to use
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setPartitionerClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Partitioner} for the job.
|
|
@param cls the <code>Partitioner</code> to use
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapOutputKeyClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the key class for the map output data. This allows the user to
|
|
specify the map output key class to be different than the final output
|
|
value class.
|
|
|
|
@param theClass the map output key class.
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapOutputValueClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the value class for the map output data. This allows the user to
|
|
specify the map output value class to be different than the final output
|
|
value class.
|
|
|
|
@param theClass the map output value class.
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputKeyClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the key class for the job output data.
|
|
|
|
@param theClass the key class for the job output data.
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputValueClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="theClass" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the value class for job outputs.
|
|
|
|
@param theClass the value class for job outputs.
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setSortComparatorClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Define the comparator that controls how the keys are sorted before they
|
|
are passed to the {@link Reducer}.
|
|
@param cls the raw comparator
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setGroupingComparatorClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Define the comparator that controls which keys are grouped together
|
|
for a single call to
|
|
{@link Reducer#reduce(Object, Iterable,
|
|
org.apache.hadoop.mapreduce.Reducer.Context)}
|
|
@param cls the raw comparator to use
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setJobName"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
|
|
<doc>
|
|
<![CDATA[Set the user-specified job name.
|
|
|
|
@param name the job's new name.
|
|
@throws IllegalStateException if the job is submitted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTrackingURL" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the URL where some job progress information will be displayed.
|
|
|
|
@return the URL where some job progress information will be displayed.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="mapProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the <i>progress</i> of the job's map-tasks, as a float between 0.0
|
|
and 1.0. When all map tasks have completed, the function returns 1.0.
|
|
|
|
@return the progress of the job's map-tasks.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reduceProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the <i>progress</i> of the job's reduce-tasks, as a float between 0.0
|
|
and 1.0. When all reduce tasks have completed, the function returns 1.0.
|
|
|
|
@return the progress of the job's reduce-tasks.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isComplete" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Check if the job is finished or not.
|
|
This is a non-blocking call.
|
|
|
|
@return <code>true</code> if the job is complete, else <code>false</code>.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isSuccessful" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Check if the job completed successfully.
|
|
|
|
@return <code>true</code> if the job succeeded, else <code>false</code>.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="killJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Kill the running job. Blocks until all job tasks have been
|
|
killed as well. If the job is no longer running, it simply returns.
|
|
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskCompletionEvents" return="org.apache.hadoop.mapred.TaskCompletionEvent[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="startFrom" type="int"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get events indicating completion (success/failure) of component tasks.
|
|
|
|
@param startFrom index to start fetching events from
|
|
@return an array of {@link TaskCompletionEvent}s
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="killTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Kill indicated task attempt.
|
|
|
|
@param taskId the id of the task to be terminated.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="failTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Fail indicated task attempt.
|
|
|
|
@param taskId the id of the task to be terminated.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounters" return="org.apache.hadoop.mapreduce.Counters"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Gets the counters for this job.
|
|
|
|
@return the counters for this job.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="submit"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Submit the job to the cluster and return immediately.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="waitForCompletion" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="verbose" type="boolean"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Submit the job to the cluster and wait for it to finish.
|
|
@param verbose print the progress to the user
|
|
@return true if the job succeeded
|
|
@throws IOException thrown if the communication with the
|
|
<code>JobTracker</code> is lost]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The job submitter's view of the Job. It allows the user to configure the
|
|
job, submit it, control its execution, and query the state. The set methods
|
|
only work until the job is submitted, afterwards they will throw an
|
|
IllegalStateException.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Job -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Job.JobState -->
|
|
<class name="Job.JobState" extends="java.lang.Enum"
|
|
abstract="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="values" return="org.apache.hadoop.mapreduce.Job.JobState[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="valueOf" return="org.apache.hadoop.mapreduce.Job.JobState"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<field name="DEFINE" type="org.apache.hadoop.mapreduce.Job.JobState"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="RUNNING" type="org.apache.hadoop.mapreduce.Job.JobState"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Job.JobState -->
|
|
<!-- start class org.apache.hadoop.mapreduce.JobContext -->
|
|
<class name="JobContext" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="JobContext" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapreduce.JobID"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getConfiguration" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Return the configuration for the job.
|
|
@return the shared configuration object]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the unique ID for the job.
|
|
@return the object with the job id]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getNumReduceTasks" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get configured the number of reduce tasks for this job. Defaults to
|
|
<code>1</code>.
|
|
@return the number of reduce tasks for this job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the current working directory for the default file system.
|
|
|
|
@return the directory name.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the key class for the job output data.
|
|
@return the key class for the job output data.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the value class for job outputs.
|
|
@return the value class for job outputs.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapOutputKeyClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the key class for the map output data. If it is not set, use the
|
|
(final) output key class. This allows the map output key class to be
|
|
different than the final output key class.
|
|
@return the map output key class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapOutputValueClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the value class for the map output data. If it is not set, use the
|
|
(final) output value class This allows the map output value class to be
|
|
different than the final output value class.
|
|
|
|
@return the map output value class.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJobName" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user-specified job name. This is only used to identify the
|
|
job to the user.
|
|
|
|
@return the job's name, defaulting to "".]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputFormatClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link InputFormat} class for the job.
|
|
|
|
@return the {@link InputFormat} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapperClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Mapper} class for the job.
|
|
|
|
@return the {@link Mapper} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCombinerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Get the combiner class for the job.
|
|
|
|
@return the combiner class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getReducerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Reducer} class for the job.
|
|
|
|
@return the {@link Reducer} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputFormatClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link OutputFormat} class for the job.
|
|
|
|
@return the {@link OutputFormat} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPartitionerClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Partitioner} class for the job.
|
|
|
|
@return the {@link Partitioner} class for the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSortComparator" return="org.apache.hadoop.io.RawComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the {@link RawComparator} comparator used to compare keys.
|
|
|
|
@return the {@link RawComparator} comparator used to compare keys.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getJar" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the pathname of the job's jar.
|
|
@return the pathname]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getGroupingComparator" return="org.apache.hadoop.io.RawComparator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the user defined {@link RawComparator} comparator for
|
|
grouping keys of inputs to the reduce.
|
|
|
|
@return comparator set by the user for grouping values.
|
|
@see Job#setGroupingComparatorClass(Class) for details.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="INPUT_FORMAT_CLASS_ATTR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="MAP_CLASS_ATTR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="COMBINE_CLASS_ATTR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="REDUCE_CLASS_ATTR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="OUTPUT_FORMAT_CLASS_ATTR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="PARTITIONER_CLASS_ATTR" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="conf" type="org.apache.hadoop.mapred.JobConf"
|
|
transient="false" volatile="false"
|
|
static="false" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A read-only view of the job that is provided to the tasks while they
|
|
are running.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.JobContext -->
|
|
<!-- start class org.apache.hadoop.mapreduce.JobID -->
|
|
<class name="JobID" extends="org.apache.hadoop.mapred.ID"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="java.lang.Comparable"/>
|
|
<constructor name="JobID" type="java.lang.String, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a JobID object
|
|
@param jtIdentifier jobTracker identifier
|
|
@param id job number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="JobID"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getJtIdentifier" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="compareTo" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="org.apache.hadoop.mapreduce.ID"/>
|
|
<doc>
|
|
<![CDATA[Compare JobIds by first jtIdentifiers, then by job numbers]]>
|
|
</doc>
|
|
</method>
|
|
<method name="appendTo" return="java.lang.StringBuilder"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="builder" type="java.lang.StringBuilder"/>
|
|
<doc>
|
|
<![CDATA[Add the stuff after the "job" prefix to the given builder. This is useful,
|
|
because the sub-ids use this substring at the start of their string.
|
|
@param builder the builder to append to
|
|
@return the builder that was passed in]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="forName" return="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="str" type="java.lang.String"/>
|
|
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
|
|
<doc>
|
|
<![CDATA[Construct a JobId object from given string
|
|
@return constructed JobId object or null if the given String is null
|
|
@throws IllegalArgumentException if the given string is malformed]]>
|
|
</doc>
|
|
</method>
|
|
<field name="JOB" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="idFormat" type="java.text.NumberFormat"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[JobID represents the immutable and unique identifier for
|
|
the job. JobID consists of two parts. First part
|
|
represents the jobtracker identifier, so that jobID to jobtracker map
|
|
is defined. For cluster setup this string is the jobtracker
|
|
start time, for local setting, it is "local".
|
|
Second part of the JobID is the job number. <br>
|
|
An example JobID is :
|
|
<code>job_200707121733_0003</code> , which represents the third job
|
|
running at the jobtracker started at <code>200707121733</code>.
|
|
<p>
|
|
Applications should never construct or parse JobID strings, but rather
|
|
use appropriate constructors or {@link #forName(String)} method.
|
|
|
|
@see TaskID
|
|
@see TaskAttemptID
|
|
@see org.apache.hadoop.mapred.JobTracker#getNewJobId()
|
|
@see org.apache.hadoop.mapred.JobTracker#getStartTime()]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.JobID -->
|
|
<!-- start class org.apache.hadoop.mapreduce.MapContext -->
|
|
<class name="MapContext" extends="org.apache.hadoop.mapreduce.TaskInputOutputContext"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MapContext" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapreduce.TaskAttemptID, org.apache.hadoop.mapreduce.RecordReader, org.apache.hadoop.mapreduce.RecordWriter, org.apache.hadoop.mapreduce.OutputCommitter, org.apache.hadoop.mapreduce.StatusReporter, org.apache.hadoop.mapreduce.InputSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getInputSplit" return="org.apache.hadoop.mapreduce.InputSplit"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the input split for this map.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentKey" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getCurrentValue" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The context that is given to the {@link Mapper}.
|
|
@param <KEYIN> the key input type to the Mapper
|
|
@param <VALUEIN> the value input type to the Mapper
|
|
@param <KEYOUT> the key output type from the Mapper
|
|
@param <VALUEOUT> the value output type from the Mapper]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.MapContext -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Mapper -->
|
|
<class name="Mapper" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Mapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Called once at the beginning of the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Called once for each key/value pair in the input split. Most applications
|
|
should override this, but the default is the identity function.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Called once at the end of the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Expert users can override this method for more complete control over the
|
|
execution of the Mapper.
|
|
@param context
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Maps input key/value pairs to a set of intermediate key/value pairs.
|
|
|
|
<p>Maps are the individual tasks which transform input records into a
|
|
intermediate records. The transformed intermediate records need not be of
|
|
the same type as the input records. A given input pair may map to zero or
|
|
many output pairs.</p>
|
|
|
|
<p>The Hadoop Map-Reduce framework spawns one map task for each
|
|
{@link InputSplit} generated by the {@link InputFormat} for the job.
|
|
<code>Mapper</code> implementations can access the {@link Configuration} for
|
|
the job via the {@link JobContext#getConfiguration()}.
|
|
|
|
<p>The framework first calls
|
|
{@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by
|
|
{@link #map(Object, Object, Context)}
|
|
for each key/value pair in the <code>InputSplit</code>. Finally
|
|
{@link #cleanup(Context)} is called.</p>
|
|
|
|
<p>All intermediate values associated with a given output key are
|
|
subsequently grouped by the framework, and passed to a {@link Reducer} to
|
|
determine the final output. Users can control the sorting and grouping by
|
|
specifying two key {@link RawComparator} classes.</p>
|
|
|
|
<p>The <code>Mapper</code> outputs are partitioned per
|
|
<code>Reducer</code>. Users can control which keys (and hence records) go to
|
|
which <code>Reducer</code> by implementing a custom {@link Partitioner}.
|
|
|
|
<p>Users can optionally specify a <code>combiner</code>, via
|
|
{@link Job#setCombinerClass(Class)}, to perform local aggregation of the
|
|
intermediate outputs, which helps to cut down the amount of data transferred
|
|
from the <code>Mapper</code> to the <code>Reducer</code>.
|
|
|
|
<p>Applications can specify if and how the intermediate
|
|
outputs are to be compressed and which {@link CompressionCodec}s are to be
|
|
used via the <code>Configuration</code>.</p>
|
|
|
|
<p>If the job has zero
|
|
reduces then the output of the <code>Mapper</code> is directly written
|
|
to the {@link OutputFormat} without sorting by keys.</p>
|
|
|
|
<p>Example:</p>
|
|
<p><blockquote><pre>
|
|
public class TokenCounterMapper
|
|
extends Mapper<Object, Text, Text, IntWritable>{
|
|
|
|
private final static IntWritable one = new IntWritable(1);
|
|
private Text word = new Text();
|
|
|
|
public void map(Object key, Text value, Context context) throws IOException {
|
|
StringTokenizer itr = new StringTokenizer(value.toString());
|
|
while (itr.hasMoreTokens()) {
|
|
word.set(itr.nextToken());
|
|
context.collect(word, one);
|
|
}
|
|
}
|
|
}
|
|
</pre></blockquote></p>
|
|
|
|
<p>Applications may override the {@link #run(Context)} method to exert
|
|
greater control on map processing e.g. multi-threaded <code>Mapper</code>s
|
|
etc.</p>
|
|
|
|
@see InputFormat
|
|
@see JobContext
|
|
@see Partitioner
|
|
@see Reducer]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Mapper -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Mapper.Context -->
|
|
<class name="Mapper.Context" extends="org.apache.hadoop.mapreduce.MapContext"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Mapper.Context" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapreduce.TaskAttemptID, org.apache.hadoop.mapreduce.RecordReader, org.apache.hadoop.mapreduce.RecordWriter, org.apache.hadoop.mapreduce.OutputCommitter, org.apache.hadoop.mapreduce.StatusReporter, org.apache.hadoop.mapreduce.InputSplit"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</constructor>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Mapper.Context -->
|
|
<!-- start class org.apache.hadoop.mapreduce.OutputCommitter -->
|
|
<class name="OutputCommitter" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="OutputCommitter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setupJob"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For the framework to setup the job output during initialization
|
|
|
|
@param jobContext Context of the job whose output is being written.
|
|
@throws IOException if temporary output could not be created]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanupJob"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[For cleaning up the job's output after job completion
|
|
|
|
@param jobContext Context of the job whose output is being written.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setupTask"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Sets up output for the task.
|
|
|
|
@param taskContext Context of the task whose output is being written.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="needsTaskCommit" return="boolean"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Check whether task needs a commit
|
|
|
|
@param taskContext
|
|
@return true/false
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="commitTask"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[To promote the task's temporary output to final output location
|
|
|
|
The task's output is moved to the job's output directory.
|
|
|
|
@param taskContext Context of the task whose output is being written.
|
|
@throws IOException if commit is not]]>
|
|
</doc>
|
|
</method>
|
|
<method name="abortTask"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Discard the task output
|
|
|
|
@param taskContext
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>OutputCommitter</code> describes the commit of task output for a
|
|
Map-Reduce job.
|
|
|
|
<p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of
|
|
the job to:<p>
|
|
<ol>
|
|
<li>
|
|
Setup the job during initialization. For example, create the temporary
|
|
output directory for the job during the initialization of the job.
|
|
</li>
|
|
<li>
|
|
Cleanup the job after the job completion. For example, remove the
|
|
temporary output directory after the job completion.
|
|
</li>
|
|
<li>
|
|
Setup the task temporary output.
|
|
</li>
|
|
<li>
|
|
Check whether a task needs a commit. This is to avoid the commit
|
|
procedure if a task does not need commit.
|
|
</li>
|
|
<li>
|
|
Commit of the task output.
|
|
</li>
|
|
<li>
|
|
Discard the task commit.
|
|
</li>
|
|
</ol>
|
|
|
|
@see org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
|
|
@see JobContext
|
|
@see TaskAttemptContext]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.OutputCommitter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.OutputFormat -->
|
|
<class name="OutputFormat" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="OutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link RecordWriter} for the given task.
|
|
|
|
@param context the information about the current task.
|
|
@return a {@link RecordWriter} to write the output for the job.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Check for validity of the output-specification for the job.
|
|
|
|
<p>This is to validate the output specification for the job when it is
|
|
a job is submitted. Typically checks that it does not already exist,
|
|
throwing an exception when it already exists, so that output is not
|
|
overwritten.</p>
|
|
|
|
@param context information about the job
|
|
@throws IOException when output should not be attempted]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the output committer for this output format. This is responsible
|
|
for ensuring the output is committed correctly.
|
|
@param context the task context
|
|
@return an output committer
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>OutputFormat</code> describes the output-specification for a
|
|
Map-Reduce job.
|
|
|
|
<p>The Map-Reduce framework relies on the <code>OutputFormat</code> of the
|
|
job to:<p>
|
|
<ol>
|
|
<li>
|
|
Validate the output-specification of the job. For e.g. check that the
|
|
output directory doesn't already exist.
|
|
<li>
|
|
Provide the {@link RecordWriter} implementation to be used to write out
|
|
the output files of the job. Output files are stored in a
|
|
{@link FileSystem}.
|
|
</li>
|
|
</ol>
|
|
|
|
@see RecordWriter]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.OutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Partitioner -->
|
|
<class name="Partitioner" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Partitioner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getPartition" return="int"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<param name="numPartitions" type="int"/>
|
|
<doc>
|
|
<![CDATA[Get the partition number for a given key (hence record) given the total
|
|
number of partitions i.e. number of reduce-tasks for the job.
|
|
|
|
<p>Typically a hash function on a all or a subset of the key.</p>
|
|
|
|
@param key the key to be partioned.
|
|
@param value the entry value.
|
|
@param numPartitions the total number of partitions.
|
|
@return the partition number for the <code>key</code>.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Partitions the key space.
|
|
|
|
<p><code>Partitioner</code> controls the partitioning of the keys of the
|
|
intermediate map-outputs. The key (or a subset of the key) is used to derive
|
|
the partition, typically by a hash function. The total number of partitions
|
|
is the same as the number of reduce tasks for the job. Hence this controls
|
|
which of the <code>m</code> reduce tasks the intermediate key (and hence the
|
|
record) is sent for reduction.</p>
|
|
|
|
@see Reducer]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Partitioner -->
|
|
<!-- start class org.apache.hadoop.mapreduce.RecordReader -->
|
|
<class name="RecordReader" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="java.io.Closeable"/>
|
|
<constructor name="RecordReader"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="initialize"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Called once at initialization.
|
|
@param split the split that defines the range of records to read
|
|
@param context the information about the task
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Read the next key, value pair.
|
|
@return true if a key/value pair was read
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentKey" return="java.lang.Object"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the current key
|
|
@return the current key or null if there is no current key
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentValue" return="java.lang.Object"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the current value.
|
|
@return the object that was read
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[The current progress of the record reader through its data.
|
|
@return a number between 0.0 and 1.0 that is the fraction of the data read
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Close the record reader.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The record reader breaks the data into key/value pairs for input to the
|
|
{@link Mapper}.
|
|
@param <KEYIN>
|
|
@param <VALUEIN>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.RecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.RecordWriter -->
|
|
<class name="RecordWriter" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="RecordWriter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="write"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Writes a key/value pair.
|
|
|
|
@param key the key to write.
|
|
@param value the value to write.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Close this <code>RecordWriter</code> to future operations.
|
|
|
|
@param context the context of the task
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[<code>RecordWriter</code> writes the output <key, value> pairs
|
|
to an output file.
|
|
|
|
<p><code>RecordWriter</code> implementations write the job outputs to the
|
|
{@link FileSystem}.
|
|
|
|
@see OutputFormat]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.RecordWriter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.ReduceContext -->
|
|
<class name="ReduceContext" extends="org.apache.hadoop.mapreduce.TaskInputOutputContext"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="ReduceContext" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapreduce.TaskAttemptID, org.apache.hadoop.mapred.RawKeyValueIterator, org.apache.hadoop.mapreduce.Counter, org.apache.hadoop.mapreduce.RecordWriter, org.apache.hadoop.mapreduce.OutputCommitter, org.apache.hadoop.mapreduce.StatusReporter, org.apache.hadoop.io.RawComparator, java.lang.Class, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</constructor>
|
|
<method name="nextKey" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Start processing next unique key.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Advance to the next key/value pair.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentKey" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getCurrentValue" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getValues" return="java.lang.Iterable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Iterate through the values for the current key, reusing the same value
|
|
object, which is stored in the context.
|
|
@return the series of values associated with the current key. All of the
|
|
objects returned directly and indirectly from this method are reused.]]>
|
|
</doc>
|
|
</method>
|
|
<field name="reporter" type="org.apache.hadoop.util.Progressable"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[The context passed to the {@link Reducer}.
|
|
@param <KEYIN> the class of the input keys
|
|
@param <VALUEIN> the class of the input values
|
|
@param <KEYOUT> the class of the output keys
|
|
@param <VALUEOUT> the class of the output values]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.ReduceContext -->
|
|
<!-- start class org.apache.hadoop.mapreduce.ReduceContext.ValueIterable -->
|
|
<class name="ReduceContext.ValueIterable" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="java.lang.Iterable"/>
|
|
<constructor name="ReduceContext.ValueIterable"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="iterator" return="java.util.Iterator"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.ReduceContext.ValueIterable -->
|
|
<!-- start class org.apache.hadoop.mapreduce.ReduceContext.ValueIterator -->
|
|
<class name="ReduceContext.ValueIterator" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<implements name="java.util.Iterator"/>
|
|
<constructor name="ReduceContext.ValueIterator"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="hasNext" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="next" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="remove"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.ReduceContext.ValueIterator -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Reducer -->
|
|
<class name="Reducer" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Reducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Called once at the start of the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="values" type="java.lang.Iterable"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[This method is called once for each key. Most applications will define
|
|
their reduce class by overriding this method. The default implementation
|
|
is an identity function.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanup"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Called once at the end of the task.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Advanced application writers can use the
|
|
{@link #run(org.apache.hadoop.mapreduce.Reducer.Context)} method to
|
|
control how the reduce task works.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Reduces a set of intermediate values which share a key to a smaller set of
|
|
values.
|
|
|
|
<p><code>Reducer</code> implementations
|
|
can access the {@link Configuration} for the job via the
|
|
{@link JobContext#getConfiguration()} method.</p>
|
|
|
|
<p><code>Reducer</code> has 3 primary phases:</p>
|
|
<ol>
|
|
<li>
|
|
|
|
<h4 id="Shuffle">Shuffle</h4>
|
|
|
|
<p>The <code>Reducer</code> copies the sorted output from each
|
|
{@link Mapper} using HTTP across the network.</p>
|
|
</li>
|
|
|
|
<li>
|
|
<h4 id="Sort">Sort</h4>
|
|
|
|
<p>The framework merge sorts <code>Reducer</code> inputs by
|
|
<code>key</code>s
|
|
(since different <code>Mapper</code>s may have output the same key).</p>
|
|
|
|
<p>The shuffle and sort phases occur simultaneously i.e. while outputs are
|
|
being fetched they are merged.</p>
|
|
|
|
<h5 id="SecondarySort">SecondarySort</h5>
|
|
|
|
<p>To achieve a secondary sort on the values returned by the value
|
|
iterator, the application should extend the key with the secondary
|
|
key and define a grouping comparator. The keys will be sorted using the
|
|
entire key, but will be grouped using the grouping comparator to decide
|
|
which keys and values are sent in the same call to reduce.The grouping
|
|
comparator is specified via
|
|
{@link Job#setGroupingComparatorClass(Class)}. The sort order is
|
|
controlled by
|
|
{@link Job#setSortComparatorClass(Class)}.</p>
|
|
|
|
|
|
For example, say that you want to find duplicate web pages and tag them
|
|
all with the url of the "best" known example. You would set up the job
|
|
like:
|
|
<ul>
|
|
<li>Map Input Key: url</li>
|
|
<li>Map Input Value: document</li>
|
|
<li>Map Output Key: document checksum, url pagerank</li>
|
|
<li>Map Output Value: url</li>
|
|
<li>Partitioner: by checksum</li>
|
|
<li>OutputKeyComparator: by checksum and then decreasing pagerank</li>
|
|
<li>OutputValueGroupingComparator: by checksum</li>
|
|
</ul>
|
|
</li>
|
|
|
|
<li>
|
|
<h4 id="Reduce">Reduce</h4>
|
|
|
|
<p>In this phase the
|
|
{@link #reduce(Object, Iterable, Context)}
|
|
method is called for each <code><key, (collection of values)></code> in
|
|
the sorted inputs.</p>
|
|
<p>The output of the reduce task is typically written to a
|
|
{@link RecordWriter} via
|
|
{@link Context#write(Object, Object)}.</p>
|
|
</li>
|
|
</ol>
|
|
|
|
<p>The output of the <code>Reducer</code> is <b>not re-sorted</b>.</p>
|
|
|
|
<p>Example:</p>
|
|
<p><blockquote><pre>
|
|
public class IntSumReducer<Key> extends Reducer<Key,IntWritable,
|
|
Key,IntWritable> {
|
|
private IntWritable result = new IntWritable();
|
|
|
|
public void reduce(Key key, Iterable<IntWritable> values,
|
|
Context context) throws IOException {
|
|
int sum = 0;
|
|
for (IntWritable val : values) {
|
|
sum += val.get();
|
|
}
|
|
result.set(sum);
|
|
context.collect(key, result);
|
|
}
|
|
}
|
|
</pre></blockquote></p>
|
|
|
|
@see Mapper
|
|
@see Partitioner]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Reducer -->
|
|
<!-- start class org.apache.hadoop.mapreduce.Reducer.Context -->
|
|
<class name="Reducer.Context" extends="org.apache.hadoop.mapreduce.ReduceContext"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Reducer.Context" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapreduce.TaskAttemptID, org.apache.hadoop.mapred.RawKeyValueIterator, org.apache.hadoop.mapreduce.Counter, org.apache.hadoop.mapreduce.RecordWriter, org.apache.hadoop.mapreduce.OutputCommitter, org.apache.hadoop.mapreduce.StatusReporter, org.apache.hadoop.io.RawComparator, java.lang.Class, java.lang.Class"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</constructor>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.Reducer.Context -->
|
|
<!-- start class org.apache.hadoop.mapreduce.StatusReporter -->
|
|
<class name="StatusReporter" extends="java.lang.Object"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="StatusReporter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="name" type="java.lang.Enum"/>
|
|
</method>
|
|
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="group" type="java.lang.String"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
</method>
|
|
<method name="progress"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setStatus"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="status" type="java.lang.String"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.StatusReporter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.TaskAttemptContext -->
|
|
<class name="TaskAttemptContext" extends="org.apache.hadoop.mapreduce.JobContext"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Progressable"/>
|
|
<constructor name="TaskAttemptContext" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapreduce.TaskAttemptID"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getTaskAttemptID" return="org.apache.hadoop.mapreduce.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the unique name for this task attempt.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="msg" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Set the current status of the task to the given string.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getStatus" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the last set status message.
|
|
@return the current status message]]>
|
|
</doc>
|
|
</method>
|
|
<method name="progress"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Report progress. The subtypes actually do work in this method.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[The context for task attempts.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.TaskAttemptContext -->
|
|
<!-- start class org.apache.hadoop.mapreduce.TaskAttemptID -->
|
|
<class name="TaskAttemptID" extends="org.apache.hadoop.mapred.ID"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskAttemptID" type="org.apache.hadoop.mapreduce.TaskID, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskAttemptID object from given {@link TaskID}.
|
|
@param taskId TaskID that this task belongs to
|
|
@param id the task attempt number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskAttemptID" type="java.lang.String, int, boolean, int, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskId object from given parts.
|
|
@param jtIdentifier jobTracker identifier
|
|
@param jobId job number
|
|
@param isMap whether the tip is a map
|
|
@param taskId taskId number
|
|
@param id the task attempt number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskAttemptID"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the {@link JobID} object that this task attempt belongs to]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getTaskID" return="org.apache.hadoop.mapreduce.TaskID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the {@link TaskID} object that this task attempt belongs to]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isMap" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns whether this TaskAttemptID is a map ID]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="appendTo" return="java.lang.StringBuilder"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="builder" type="java.lang.StringBuilder"/>
|
|
<doc>
|
|
<![CDATA[Add the unique string to the StringBuilder
|
|
@param builder the builder to append ot
|
|
@return the builder that was passed in.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="compareTo" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="org.apache.hadoop.mapreduce.ID"/>
|
|
<doc>
|
|
<![CDATA[Compare TaskIds by first tipIds, then by task numbers.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="forName" return="org.apache.hadoop.mapreduce.TaskAttemptID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="str" type="java.lang.String"/>
|
|
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
|
|
<doc>
|
|
<![CDATA[Construct a TaskAttemptID object from given string
|
|
@return constructed TaskAttemptID object or null if the given String is null
|
|
@throws IllegalArgumentException if the given string is malformed]]>
|
|
</doc>
|
|
</method>
|
|
<field name="ATTEMPT" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[TaskAttemptID represents the immutable and unique identifier for
|
|
a task attempt. Each task attempt is one particular instance of a Map or
|
|
Reduce Task identified by its TaskID.
|
|
|
|
TaskAttemptID consists of 2 parts. First part is the
|
|
{@link TaskID}, that this TaskAttemptID belongs to.
|
|
Second part is the task attempt number. <br>
|
|
An example TaskAttemptID is :
|
|
<code>attempt_200707121733_0003_m_000005_0</code> , which represents the
|
|
zeroth task attempt for the fifth map task in the third job
|
|
running at the jobtracker started at <code>200707121733</code>.
|
|
<p>
|
|
Applications should never construct or parse TaskAttemptID strings
|
|
, but rather use appropriate constructors or {@link #forName(String)}
|
|
method.
|
|
|
|
@see JobID
|
|
@see TaskID]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.TaskAttemptID -->
|
|
<!-- start class org.apache.hadoop.mapreduce.TaskID -->
|
|
<class name="TaskID" extends="org.apache.hadoop.mapred.ID"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, boolean, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskID object from given {@link JobID}.
|
|
@param jobId JobID that this tip belongs to
|
|
@param isMap whether the tip is a map
|
|
@param id the tip number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskID" type="java.lang.String, int, boolean, int"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a TaskInProgressId object from given parts.
|
|
@param jtIdentifier jobTracker identifier
|
|
@param jobId job number
|
|
@param isMap whether the tip is a map
|
|
@param id the tip number]]>
|
|
</doc>
|
|
</constructor>
|
|
<constructor name="TaskID"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns the {@link JobID} object that this tip belongs to]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isMap" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Returns whether this TaskID is a map ID]]>
|
|
</doc>
|
|
</method>
|
|
<method name="equals" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="java.lang.Object"/>
|
|
</method>
|
|
<method name="compareTo" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="o" type="org.apache.hadoop.mapreduce.ID"/>
|
|
<doc>
|
|
<![CDATA[Compare TaskInProgressIds by first jobIds, then by tip numbers. Reduces are
|
|
defined as greater then maps.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="appendTo" return="java.lang.StringBuilder"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="builder" type="java.lang.StringBuilder"/>
|
|
<doc>
|
|
<![CDATA[Add the unique string to the given builder.
|
|
@param builder the builder to append to
|
|
@return the builder that was passed in]]>
|
|
</doc>
|
|
</method>
|
|
<method name="hashCode" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="forName" return="org.apache.hadoop.mapreduce.TaskID"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="str" type="java.lang.String"/>
|
|
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
|
|
<doc>
|
|
<![CDATA[Construct a TaskID object from given string
|
|
@return constructed TaskID object or null if the given String is null
|
|
@throws IllegalArgumentException if the given string is malformed]]>
|
|
</doc>
|
|
</method>
|
|
<field name="TASK" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<field name="idFormat" type="java.text.NumberFormat"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[TaskID represents the immutable and unique identifier for
|
|
a Map or Reduce Task. Each TaskID encompasses multiple attempts made to
|
|
execute the Map or Reduce Task, each of which are uniquely indentified by
|
|
their TaskAttemptID.
|
|
|
|
TaskID consists of 3 parts. First part is the {@link JobID}, that this
|
|
TaskInProgress belongs to. Second part of the TaskID is either 'm' or 'r'
|
|
representing whether the task is a map task or a reduce task.
|
|
And the third part is the task number. <br>
|
|
An example TaskID is :
|
|
<code>task_200707121733_0003_m_000005</code> , which represents the
|
|
fifth map task in the third job running at the jobtracker
|
|
started at <code>200707121733</code>.
|
|
<p>
|
|
Applications should never construct or parse TaskID strings
|
|
, but rather use appropriate constructors or {@link #forName(String)}
|
|
method.
|
|
|
|
@see JobID
|
|
@see TaskAttemptID]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.TaskID -->
|
|
<!-- start class org.apache.hadoop.mapreduce.TaskInputOutputContext -->
|
|
<class name="TaskInputOutputContext" extends="org.apache.hadoop.mapreduce.TaskAttemptContext"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Progressable"/>
|
|
<constructor name="TaskInputOutputContext" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapreduce.TaskAttemptID, org.apache.hadoop.mapreduce.RecordWriter, org.apache.hadoop.mapreduce.OutputCommitter, org.apache.hadoop.mapreduce.StatusReporter"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Advance to the next key, value pair, returning null if at end.
|
|
@return the key object that was read into, or null if no more]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentKey" return="java.lang.Object"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the current key.
|
|
@return the current key object or null if there isn't one
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCurrentValue" return="java.lang.Object"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the current value.
|
|
@return the value object that was read into
|
|
@throws IOException
|
|
@throws InterruptedException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Generate an output key/value pair.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="counterName" type="java.lang.Enum"/>
|
|
</method>
|
|
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="groupName" type="java.lang.String"/>
|
|
<param name="counterName" type="java.lang.String"/>
|
|
</method>
|
|
<method name="progress"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="setStatus"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="status" type="java.lang.String"/>
|
|
</method>
|
|
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A context object that allows input and output from the task. It is only
|
|
supplied to the {@link Mapper} or {@link Reducer}.
|
|
@param <KEYIN> the input key type for the task
|
|
@param <VALUEIN> the input value type for the task
|
|
@param <KEYOUT> the output key type for the task
|
|
@param <VALUEOUT> the output value type for the task]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.TaskInputOutputContext -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.input">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.FileInputFormat -->
|
|
<class name="FileInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="FileInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getFormatMinSplitSize" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the lower bound on split size imposed by the format.
|
|
@return the number of bytes of the minimal split for this format]]>
|
|
</doc>
|
|
</method>
|
|
<method name="isSplitable" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<param name="filename" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Is the given filename splitable? Usually, true, but if the file is
|
|
stream compressed, it will not be.
|
|
|
|
<code>FileInputFormat</code> implementations can override this and return
|
|
<code>false</code> to ensure that individual input files are never split-up
|
|
so that {@link Mapper}s process entire files.
|
|
|
|
@param context the job context
|
|
@param filename the file name to check
|
|
@return is this file splitable?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInputPathFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="filter" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set a PathFilter to be applied to the input paths for the map-reduce job.
|
|
@param job the job to modify
|
|
@param filter the PathFilter class use for filtering the input paths.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMinInputSplitSize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="size" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the minimum input split size
|
|
@param job the job to modify
|
|
@param size the minimum size]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMinSplitSize" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the minimum split size
|
|
@param job the job
|
|
@return the minimum number of bytes that can be in a split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMaxInputSplitSize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="size" type="long"/>
|
|
<doc>
|
|
<![CDATA[Set the maximum split size
|
|
@param job the job to modify
|
|
@param size the maximum split size]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMaxSplitSize" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the maximum split size.
|
|
@param context the job to look at.
|
|
@return the maximum number of bytes a split can include]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputPathFilter" return="org.apache.hadoop.fs.PathFilter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get a PathFilter instance of the filter set for the input paths.
|
|
|
|
@return the PathFilter instance set for the job, NULL if none has been set.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="listStatus" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[List input directories.
|
|
Subclasses may override to, e.g., select only files matching a regular
|
|
expression.
|
|
|
|
@param job the job to list input paths for
|
|
@return array of FileStatus objects
|
|
@throws IOException if zero items.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getSplits" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Generate the list of files and make them into FileSplits.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="computeSplitSize" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="blockSize" type="long"/>
|
|
<param name="minSize" type="long"/>
|
|
<param name="maxSize" type="long"/>
|
|
</method>
|
|
<method name="getBlockIndex" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/>
|
|
<param name="offset" type="long"/>
|
|
</method>
|
|
<method name="setInputPaths"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="commaSeparatedPaths" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Sets the given comma separated paths as the list of inputs
|
|
for the map-reduce job.
|
|
|
|
@param job the job
|
|
@param commaSeparatedPaths Comma separated paths to be set as
|
|
the list of inputs for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addInputPaths"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="commaSeparatedPaths" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add the given comma separated paths to the list of inputs for
|
|
the map-reduce job.
|
|
|
|
@param job The job to modify
|
|
@param commaSeparatedPaths Comma separated paths to be added to
|
|
the list of inputs for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setInputPaths"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="inputPaths" type="org.apache.hadoop.fs.Path[]"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Set the array of {@link Path}s as the list of inputs
|
|
for the map-reduce job.
|
|
|
|
@param job The job to modify
|
|
@param inputPaths the {@link Path}s of the input directories/files
|
|
for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="addInputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="path" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Add a {@link Path} to the list of inputs for the map-reduce job.
|
|
|
|
@param job The {@link Job} to modify
|
|
@param path {@link Path} to be added to the list of inputs for
|
|
the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getInputPaths" return="org.apache.hadoop.fs.Path[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the list of input {@link Path}s for the map-reduce job.
|
|
|
|
@param context The job
|
|
@return the list of input {@link Path}s for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A base class for file-based {@link InputFormat}s.
|
|
|
|
<p><code>FileInputFormat</code> is the base class for all file-based
|
|
<code>InputFormat</code>s. This provides a generic implementation of
|
|
{@link #getSplits(JobContext)}.
|
|
Subclasses of <code>FileInputFormat</code> can also override the
|
|
{@link #isSplitable(JobContext, Path)} method to ensure input-files are
|
|
not split-up and are processed as a whole by {@link Mapper}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.FileInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.FileSplit -->
|
|
<class name="FileSplit" extends="org.apache.hadoop.mapreduce.InputSplit"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.io.Writable"/>
|
|
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[]"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Constructs a split with host information
|
|
|
|
@param file the file name
|
|
@param start the position of the first byte in the file to process
|
|
@param length the number of bytes in the file to process
|
|
@param hosts the list of hosts containing the block, possibly null]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The file containing this split's data.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getStart" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The position of the first byte in the file to process.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getLength" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[The number of bytes in the file to process.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="toString" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="out" type="java.io.DataOutput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="readFields"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="in" type="java.io.DataInput"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getLocations" return="java.lang.String[]"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A section of an input file. Returned by {@link
|
|
InputFormat#getSplits(JobContext)} and passed to
|
|
{@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.FileSplit -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.InvalidInputException -->
|
|
<class name="InvalidInputException" extends="java.io.IOException"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="InvalidInputException" type="java.util.List"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Create the exception with the given list.
|
|
@param probs the list of problems to report. this list is not copied.]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="getProblems" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the complete list of the problems reported.
|
|
@return the list of problems, which must not be modified]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMessage" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get a summary message of the problems found.
|
|
@return the concatenated messages from all of the problems.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[This class wraps a list of problems with the input, so that the user
|
|
can get a list of problems together instead of finding and fixing them one
|
|
by one.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.InvalidInputException -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.LineRecordReader -->
|
|
<class name="LineRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="LineRecordReader"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="initialize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="genericSplit" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getCurrentKey" return="org.apache.hadoop.io.LongWritable"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getCurrentValue" return="org.apache.hadoop.io.Text"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Get the progress within the split]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Treats keys as offset in file and value as line.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.LineRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat -->
|
|
<class name="SequenceFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="getFormatMinSplitSize" return="long"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="listStatus" return="java.util.List"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link InputFormat} for {@link SequenceFile}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader -->
|
|
<class name="SequenceFileRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileRecordReader"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="initialize"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="nextKeyValue" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getCurrentKey" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getCurrentValue" return="java.lang.Object"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="getProgress" return="float"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Return the progress within the input split
|
|
@return 0.0 to 1.0 of the input byte range]]>
|
|
</doc>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="conf" type="org.apache.hadoop.conf.Configuration"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[An {@link RecordReader} for {@link SequenceFile}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.input.TextInputFormat -->
|
|
<class name="TextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TextInputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
</method>
|
|
<method name="isSplitable" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<param name="file" type="org.apache.hadoop.fs.Path"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines.
|
|
Either linefeed or carriage-return are used to signal end of line. Keys are
|
|
the position in the file, and values are the line of text..]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.input.TextInputFormat -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.map">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.map.InverseMapper -->
|
|
<class name="InverseMapper" extends="org.apache.hadoop.mapreduce.Mapper"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="InverseMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[The inverse function. Input keys and values are swapped.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link Mapper} that swaps keys and values.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.map.InverseMapper -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper -->
|
|
<class name="MultithreadedMapper" extends="org.apache.hadoop.mapreduce.Mapper"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="MultithreadedMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getNumberOfThreads" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[The number of threads in the thread pool that will run the map function.
|
|
@param job the job
|
|
@return the number of threads]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setNumberOfThreads"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="threads" type="int"/>
|
|
<doc>
|
|
<![CDATA[Set the number of threads in the pool for running maps.
|
|
@param job the job to modify
|
|
@param threads the new number of threads]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getMapperClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the application's mapper class.
|
|
@param <K1> the map's input key type
|
|
@param <V1> the map's input value type
|
|
@param <K2> the map's output key type
|
|
@param <V2> the map's output value type
|
|
@param job the job
|
|
@return the mapper class to run]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setMapperClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="cls" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the application's mapper class.
|
|
@param <K1> the map input key type
|
|
@param <V1> the map input value type
|
|
@param <K2> the map output key type
|
|
@param <V2> the map output value type
|
|
@param job the job to modify
|
|
@param cls the class to use as the mapper]]>
|
|
</doc>
|
|
</method>
|
|
<method name="run"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Run the application's maps using a thread pool.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Multithreaded implementation for @link org.apache.hadoop.mapreduce.Mapper.
|
|
<p>
|
|
It can be used instead of the default implementation,
|
|
@link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU
|
|
bound in order to improve throughput.
|
|
<p>
|
|
Mapper implementations using this MapRunnable must be thread-safe.
|
|
<p>
|
|
The Map-Reduce job has to be configured with the mapper to use via
|
|
{@link #setMapperClass(Configuration, Class)} and
|
|
the number of thread the thread-pool can use with the
|
|
{@link #getNumberOfThreads(Configuration) method. The default
|
|
value is 10 threads.
|
|
<p>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper -->
|
|
<class name="TokenCounterMapper" extends="org.apache.hadoop.mapreduce.Mapper"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TokenCounterMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="org.apache.hadoop.io.Text"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Tokenize the input values and emit each word with a count of 1.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.output">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter -->
|
|
<class name="FileOutputCommitter" extends="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="FileOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.TaskAttemptContext"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Create a file output committer
|
|
@param outputPath the job's output path
|
|
@param context the task's context
|
|
@throws IOException]]>
|
|
</doc>
|
|
</constructor>
|
|
<method name="setupJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Create the temporary directory that is the root of all of the task
|
|
work directories.
|
|
@param context the job's context]]>
|
|
</doc>
|
|
</method>
|
|
<method name="cleanupJob"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Delete the temporary directory, including all of the work directories.
|
|
@param context the job's context]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setupTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[No task setup required.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="commitTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Move the files from the work directory to the job output directory
|
|
@param context the task context]]>
|
|
</doc>
|
|
</method>
|
|
<method name="abortTask"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<doc>
|
|
<![CDATA[Delete the work directory]]>
|
|
</doc>
|
|
</method>
|
|
<method name="needsTaskCommit" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Did this task write any files in the work directory?
|
|
@param context the task's context]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getWorkPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the directory that the task should write results into
|
|
@return the work directory
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<field name="TEMP_DIR_NAME" type="java.lang.String"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Temporary directory name]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[An {@link OutputCommitter} that commits files specified
|
|
in job output directory i.e. ${mapred.output.dir}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat -->
|
|
<class name="FileOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat"
|
|
abstract="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="FileOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setCompressOutput"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="compress" type="boolean"/>
|
|
<doc>
|
|
<![CDATA[Set whether the output of the job is compressed.
|
|
@param job the job to modify
|
|
@param compress should the output of the job be compressed?]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getCompressOutput" return="boolean"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Is the job output compressed?
|
|
@param job the Job to look in
|
|
@return <code>true</code> if the job output should be compressed,
|
|
<code>false</code> otherwise]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputCompressorClass"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="codecClass" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link CompressionCodec} to be used to compress job outputs.
|
|
@param job the job to modify
|
|
@param codecClass the {@link CompressionCodec} to be used to
|
|
compress the job outputs]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputCompressorClass" return="java.lang.Class"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<param name="defaultValue" type="java.lang.Class"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link CompressionCodec} for compressing the job outputs.
|
|
@param job the {@link Job} to look in
|
|
@param defaultValue the {@link CompressionCodec} to return if not set
|
|
@return the {@link CompressionCodec} to be used to compress the
|
|
job outputs
|
|
@throws IllegalArgumentException if the class was specified, but not found]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="true" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<exception name="FileAlreadyExistsException" type="org.apache.hadoop.mapred.FileAlreadyExistsException"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="setOutputPath"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="outputDir" type="org.apache.hadoop.fs.Path"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link Path} of the output directory for the map-reduce job.
|
|
|
|
@param job The job to modify
|
|
@param outputDir the {@link Path} of the output directory for
|
|
the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Path} to the output directory for the map-reduce job.
|
|
|
|
@return the {@link Path} to the output directory for the map-reduce job.
|
|
@see FileOutputFormat#getWorkOutputPath(TaskInputOutputContext)]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getWorkOutputPath" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskInputOutputContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link Path} to the task's temporary output directory
|
|
for the map-reduce job
|
|
|
|
<h4 id="SideEffectFiles">Tasks' Side-Effect Files</h4>
|
|
|
|
<p>Some applications need to create/write-to side-files, which differ from
|
|
the actual job-outputs.
|
|
|
|
<p>In such cases there could be issues with 2 instances of the same TIP
|
|
(running simultaneously e.g. speculative tasks) trying to open/write-to the
|
|
same file (path) on HDFS. Hence the application-writer will have to pick
|
|
unique names per task-attempt (e.g. using the attemptid, say
|
|
<tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p>
|
|
|
|
<p>To get around this the Map-Reduce framework helps the application-writer
|
|
out by maintaining a special
|
|
<tt>${mapred.output.dir}/_temporary/_${taskid}</tt>
|
|
sub-directory for each task-attempt on HDFS where the output of the
|
|
task-attempt goes. On successful completion of the task-attempt the files
|
|
in the <tt>${mapred.output.dir}/_temporary/_${taskid}</tt> (only)
|
|
are <i>promoted</i> to <tt>${mapred.output.dir}</tt>. Of course, the
|
|
framework discards the sub-directory of unsuccessful task-attempts. This
|
|
is completely transparent to the application.</p>
|
|
|
|
<p>The application-writer can take advantage of this by creating any
|
|
side-files required in a work directory during execution
|
|
of his task i.e. via
|
|
{@link #getWorkOutputPath(TaskInputOutputContext)}, and
|
|
the framework will move them out similarly - thus she doesn't have to pick
|
|
unique paths per task-attempt.</p>
|
|
|
|
<p>The entire discussion holds true for maps of jobs with
|
|
reducer=NONE (i.e. 0 reduces) since output of the map, in that case,
|
|
goes directly to HDFS.</p>
|
|
|
|
@return the {@link Path} to the task's temporary output directory
|
|
for the map-reduce job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getPathForWorkFile" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskInputOutputContext"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="extension" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
<doc>
|
|
<![CDATA[Helper function to generate a {@link Path} for a file that is unique for
|
|
the task within the job output directory.
|
|
|
|
<p>The path can be used to create custom files from within the map and
|
|
reduce tasks. The path name will be unique for each task. The path parent
|
|
will be the job output directory.</p>ls
|
|
|
|
<p>This method uses the {@link #getUniqueFile} method to make the file name
|
|
unique for the task.</p>
|
|
|
|
@param context the context for the task.
|
|
@param name the name for the file.
|
|
@param extension the extension for the file
|
|
@return a unique path accross all tasks of the job.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getUniqueFile" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<param name="name" type="java.lang.String"/>
|
|
<param name="extension" type="java.lang.String"/>
|
|
<doc>
|
|
<![CDATA[Generate a unique filename, based on the task id, name, and extension
|
|
@param context the task that is calling this
|
|
@param name the base filename
|
|
@param extension the filename extension
|
|
@return a string like $name-[mr]-$id$extension]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getDefaultWorkFile" return="org.apache.hadoop.fs.Path"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<param name="extension" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[Get the default path and filename for the output format.
|
|
@param context the task context
|
|
@param extension an extension to add to the filename
|
|
@return a full path $output/_temporary/$taskid/part-[mr]-$id
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A base class for {@link OutputFormat}s that read from {@link FileSystem}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.NullOutputFormat -->
|
|
<class name="NullOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="NullOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
</method>
|
|
<method name="checkOutputSpecs"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
</method>
|
|
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Consume all outputs and put them in /dev/null.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.NullOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat -->
|
|
<class name="SequenceFileOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="SequenceFileOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<method name="getOutputCompressionType" return="org.apache.hadoop.io.SequenceFile.CompressionType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
|
|
<doc>
|
|
<![CDATA[Get the {@link CompressionType} for the output {@link SequenceFile}.
|
|
@param job the {@link Job}
|
|
@return the {@link CompressionType} for the output {@link SequenceFile},
|
|
defaulting to {@link CompressionType#RECORD}]]>
|
|
</doc>
|
|
</method>
|
|
<method name="setOutputCompressionType"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
|
|
<param name="style" type="org.apache.hadoop.io.SequenceFile.CompressionType"/>
|
|
<doc>
|
|
<![CDATA[Set the {@link CompressionType} for the output {@link SequenceFile}.
|
|
@param job the {@link Job} to modify
|
|
@param style the {@link CompressionType} for the output
|
|
{@link SequenceFile}]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link OutputFormat} that writes {@link SequenceFile}s.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat -->
|
|
<class name="TextOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="TextOutputFormat"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[An {@link OutputFormat} that writes plain text files.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.LineRecordWriter -->
|
|
<class name="TextOutputFormat.LineRecordWriter" extends="org.apache.hadoop.mapreduce.RecordWriter"
|
|
abstract="false"
|
|
static="true" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
<constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream, java.lang.String"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="write"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="close"
|
|
abstract="false" native="false" synchronized="true"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<field name="out" type="java.io.DataOutputStream"
|
|
transient="false" volatile="false"
|
|
static="false" final="false" visibility="protected"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.LineRecordWriter -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.partition">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.partition.HashPartitioner -->
|
|
<class name="HashPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="HashPartitioner"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="getPartition" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="value" type="java.lang.Object"/>
|
|
<param name="numReduceTasks" type="int"/>
|
|
<doc>
|
|
<![CDATA[Use {@link Object#hashCode()} to partition.]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Partition keys by their {@link Object#hashCode()}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.partition.HashPartitioner -->
|
|
</package>
|
|
<package name="org.apache.hadoop.mapreduce.lib.reduce">
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer -->
|
|
<class name="IntSumReducer" extends="org.apache.hadoop.mapreduce.Reducer"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="IntSumReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="values" type="java.lang.Iterable"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer -->
|
|
<!-- start class org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer -->
|
|
<class name="LongSumReducer" extends="org.apache.hadoop.mapreduce.Reducer"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="LongSumReducer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="reduce"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="java.lang.Object"/>
|
|
<param name="values" type="java.lang.Iterable"/>
|
|
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
|
|
</method>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer -->
|
|
</package>
|
|
<package name="org.apache.hadoop.tools">
|
|
<!-- start class org.apache.hadoop.tools.DistCh -->
|
|
<class name="DistCh" extends="org.apache.hadoop.tools.DistTool"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
<doc>
|
|
<![CDATA[This is the main driver for recursively changing files properties.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A Map-reduce program to recursively change files properties
|
|
such as owner, group and permission.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.DistCh -->
|
|
<!-- start class org.apache.hadoop.tools.DistCp -->
|
|
<class name="DistCp" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Tool"/>
|
|
<constructor name="DistCp" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="copy"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
<param name="srcPath" type="java.lang.String"/>
|
|
<param name="destPath" type="java.lang.String"/>
|
|
<param name="logPath" type="org.apache.hadoop.fs.Path"/>
|
|
<param name="srcAsList" type="boolean"/>
|
|
<param name="ignoreReadFailures" type="boolean"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[This is the main driver for recursively copying directories
|
|
across file systems. It takes at least two cmdline parameters. A source
|
|
URL and a destination URL. It then essentially does an "ls -lR" on the
|
|
source URL, and writes the output in a round-robin manner to all the map
|
|
input files. The mapper actually copies the files allotted to it. The
|
|
reduce is empty.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
</method>
|
|
<method name="getRandomId" return="java.lang.String"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<field name="LOG" type="org.apache.commons.logging.Log"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
</field>
|
|
<doc>
|
|
<![CDATA[A Map-reduce program to recursively copy directories between
|
|
different file-systems.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.DistCp -->
|
|
<!-- start class org.apache.hadoop.tools.DistCp.DuplicationException -->
|
|
<class name="DistCp.DuplicationException" extends="java.io.IOException"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<field name="ERROR_CODE" type="int"
|
|
transient="false" volatile="false"
|
|
static="true" final="true" visibility="public"
|
|
deprecated="not deprecated">
|
|
<doc>
|
|
<![CDATA[Error code for this exception]]>
|
|
</doc>
|
|
</field>
|
|
<doc>
|
|
<![CDATA[An exception class for duplicated source files.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.DistCp.DuplicationException -->
|
|
<!-- start class org.apache.hadoop.tools.HadoopArchives -->
|
|
<class name="HadoopArchives" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.util.Tool"/>
|
|
<constructor name="HadoopArchives" type="org.apache.hadoop.conf.Configuration"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="archive"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="srcPaths" type="java.util.List"/>
|
|
<param name="archiveName" type="java.lang.String"/>
|
|
<param name="dest" type="org.apache.hadoop.fs.Path"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[archive the given source paths into
|
|
the dest
|
|
@param srcPaths the src paths to be archived
|
|
@param dest the dest dir that will contain the archive]]>
|
|
</doc>
|
|
</method>
|
|
<method name="run" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<exception name="Exception" type="java.lang.Exception"/>
|
|
<doc>
|
|
<![CDATA[the main driver for creating the archives
|
|
it takes at least two command line parameters. The src and the
|
|
dest. It does an lsr on the source paths.
|
|
The mapper created archuves and the reducer creates
|
|
the archive index.]]>
|
|
</doc>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
<doc>
|
|
<![CDATA[the main functions]]>
|
|
</doc>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[a archive creation utility.
|
|
This class provides methods that can be used
|
|
to create hadoop archives. For understanding of
|
|
Hadoop archives look at {@link HarFileSystem}.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.HadoopArchives -->
|
|
<!-- start class org.apache.hadoop.tools.Logalyzer -->
|
|
<class name="Logalyzer" extends="java.lang.Object"
|
|
abstract="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<constructor name="Logalyzer"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="doArchive"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="logListURI" type="java.lang.String"/>
|
|
<param name="archiveDirectory" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[doArchive: Workhorse function to archive log-files.
|
|
@param logListURI : The uri which will serve list of log-files to archive.
|
|
@param archiveDirectory : The directory to store archived logfiles.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="doAnalyze"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="inputFilesDirectory" type="java.lang.String"/>
|
|
<param name="outputDirectory" type="java.lang.String"/>
|
|
<param name="grepPattern" type="java.lang.String"/>
|
|
<param name="sortColumns" type="java.lang.String"/>
|
|
<param name="columnSeparator" type="java.lang.String"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
<doc>
|
|
<![CDATA[doAnalyze:
|
|
@param inputFilesDirectory : Directory containing the files to be analyzed.
|
|
@param outputDirectory : Directory to store analysis (output).
|
|
@param grepPattern : Pattern to *grep* for.
|
|
@param sortColumns : Sort specification for output.
|
|
@param columnSeparator : Column separator.
|
|
@throws IOException]]>
|
|
</doc>
|
|
</method>
|
|
<method name="main"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="args" type="java.lang.String[]"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[Logalyzer: A utility tool for archiving and analyzing hadoop logs.
|
|
<p>
|
|
This tool supports archiving and anaylzing (sort/grep) of log-files.
|
|
It takes as input
|
|
a) Input uri which will serve uris of the logs to be archived.
|
|
b) Output directory (not mandatory).
|
|
b) Directory on dfs to archive the logs.
|
|
c) The sort/grep patterns for analyzing the files and separator for boundaries.
|
|
Usage:
|
|
Logalyzer -archive -archiveDir <directory to archive logs> -analysis <directory> -logs <log-list uri> -grep <pattern> -sort <col1, col2> -separator <separator>
|
|
<p>]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.Logalyzer -->
|
|
<!-- start class org.apache.hadoop.tools.Logalyzer.LogComparator -->
|
|
<class name="Logalyzer.LogComparator" extends="org.apache.hadoop.io.Text.Comparator"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.conf.Configurable"/>
|
|
<constructor name="Logalyzer.LogComparator"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="setConf"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
|
|
</method>
|
|
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</method>
|
|
<method name="compare" return="int"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="b1" type="byte[]"/>
|
|
<param name="s1" type="int"/>
|
|
<param name="l1" type="int"/>
|
|
<param name="b2" type="byte[]"/>
|
|
<param name="s2" type="int"/>
|
|
<param name="l2" type="int"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A WritableComparator optimized for UTF8 keys of the logs.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.Logalyzer.LogComparator -->
|
|
<!-- start class org.apache.hadoop.tools.Logalyzer.LogRegexMapper -->
|
|
<class name="Logalyzer.LogRegexMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
|
|
abstract="false"
|
|
static="true" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<implements name="org.apache.hadoop.mapred.Mapper"/>
|
|
<constructor name="Logalyzer.LogRegexMapper"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
</constructor>
|
|
<method name="configure"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
|
|
</method>
|
|
<method name="map"
|
|
abstract="false" native="false" synchronized="false"
|
|
static="false" final="false" visibility="public"
|
|
deprecated="not deprecated">
|
|
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
|
|
<param name="value" type="org.apache.hadoop.io.Text"/>
|
|
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
|
|
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
|
|
<exception name="IOException" type="java.io.IOException"/>
|
|
</method>
|
|
<doc>
|
|
<![CDATA[A {@link Mapper} that extracts text matching a regular expression.]]>
|
|
</doc>
|
|
</class>
|
|
<!-- end class org.apache.hadoop.tools.Logalyzer.LogRegexMapper -->
|
|
</package>
|
|
|
|
</api>
|