Merge r1555021 through r1561943 from trunk.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-5535@1561944 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2014-01-28 05:37:13 +00:00
commit efcdf81ca9
38 changed files with 1367 additions and 682 deletions

View File

@ -24,8 +24,7 @@ Configuration
* Server Side Configuration Setup * Server Side Configuration Setup
The {{{./apidocs/org/apache/hadoop/auth/server/AuthenticationFilter.html} The AuthenticationFilter filter is Hadoop Auth's server side component.
AuthenticationFilter filter}} is Hadoop Auth's server side component.
This filter must be configured in front of all the web application resources This filter must be configured in front of all the web application resources
that required authenticated requests. For example: that required authenticated requests. For example:
@ -46,9 +45,7 @@ Configuration
must start with the prefix. The default value is no prefix. must start with the prefix. The default value is no prefix.
* <<<[PREFIX.]type>>>: the authentication type keyword (<<<simple>>> or * <<<[PREFIX.]type>>>: the authentication type keyword (<<<simple>>> or
<<<kerberos>>>) or a <<<kerberos>>>) or a Authentication handler implementation.
{{{./apidocs/org/apache/hadoop/auth/server/AuthenticationHandler.html}
Authentication handler implementation}}.
* <<<[PREFIX.]signature.secret>>>: The secret to SHA-sign the generated * <<<[PREFIX.]signature.secret>>>: The secret to SHA-sign the generated
authentication tokens. If a secret is not provided a random secret is authentication tokens. If a secret is not provided a random secret is

View File

@ -52,7 +52,3 @@ Hadoop Auth, Java HTTP SPNEGO ${project.version}
* {{{./BuildingIt.html}Building It}} * {{{./BuildingIt.html}Building It}}
* {{{./apidocs/index.html}JavaDocs}}
* {{{./dependencies.html}Dependencies}}

View File

@ -536,6 +536,15 @@ Release 2.4.0 - UNRELEASED
HADOOP-10252. HttpServer can't start if hostname is not specified. (Jimmy HADOOP-10252. HttpServer can't start if hostname is not specified. (Jimmy
Xiang via atm) Xiang via atm)
HADOOP-10203. Connection leak in
Jets3tNativeFileSystemStore#retrieveMetadata. (Andrei Savu via atm)
HADOOP-10250. VersionUtil returns wrong value when comparing two versions.
(Yongjun Zhang via atm)
HADOOP-10288. Explicit reference to Log4JLogger breaks non-log4j users
(todd)
Release 2.3.0 - UNRELEASED Release 2.3.0 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES
@ -559,6 +568,12 @@ Release 2.3.0 - UNRELEASED
HADOOP-10248. Property name should be included in the exception where property value HADOOP-10248. Property name should be included in the exception where property value
is null (Akira AJISAKA via umamahesh) is null (Akira AJISAKA via umamahesh)
HADOOP-10086. User document for authentication in secure cluster.
(Masatake Iwasaki via Arpit Agarwal)
HADOOP-10274 Lower the logging level from ERROR to WARN for UGI.doAs method
(Takeshi Miao via stack)
OPTIMIZATIONS OPTIMIZATIONS
HADOOP-10142. Avoid groups lookup for unprivileged users such as "dr.who" HADOOP-10142. Avoid groups lookup for unprivileged users such as "dr.who"
@ -637,6 +652,12 @@ Release 2.3.0 - UNRELEASED
HADOOP-10167. Mark hadoop-common source as UTF-8 in Maven pom files / refactoring HADOOP-10167. Mark hadoop-common source as UTF-8 in Maven pom files / refactoring
(Mikhail Antonov via cos) (Mikhail Antonov via cos)
HADOOP-9982. Fix dead links in hadoop site docs. (Akira Ajisaka via Arpit
Agarwal)
HADOOP-10212. Incorrect compile command in Native Library document.
(Akira Ajisaka via Arpit Agarwal)
Release 2.2.0 - 2013-10-13 Release 2.2.0 - 2013-10-13
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -364,4 +364,11 @@
<Bug pattern="OBL_UNSATISFIED_OBLIGATION"/> <Bug pattern="OBL_UNSATISFIED_OBLIGATION"/>
</Match> </Match>
<!-- code from maven source, null value is checked at callee side. -->
<Match>
<Class name="org.apache.hadoop.util.ComparableVersion$ListItem" />
<Method name="compareTo" />
<Bug code="NP" />
</Match>
</FindBugsFilter> </FindBugsFilter>

View File

@ -110,23 +110,29 @@ class Jets3tNativeFileSystemStore implements NativeFileSystemStore {
handleS3ServiceException(e); handleS3ServiceException(e);
} }
} }
@Override @Override
public FileMetadata retrieveMetadata(String key) throws IOException { public FileMetadata retrieveMetadata(String key) throws IOException {
StorageObject object = null;
try { try {
if(LOG.isDebugEnabled()) { if(LOG.isDebugEnabled()) {
LOG.debug("Getting metadata for key: " + key + " from bucket:" + bucket.getName()); LOG.debug("Getting metadata for key: " + key + " from bucket:" + bucket.getName());
} }
S3Object object = s3Service.getObject(bucket.getName(), key); object = s3Service.getObjectDetails(bucket.getName(), key);
return new FileMetadata(key, object.getContentLength(), return new FileMetadata(key, object.getContentLength(),
object.getLastModifiedDate().getTime()); object.getLastModifiedDate().getTime());
} catch (S3ServiceException e) {
} catch (ServiceException e) {
// Following is brittle. Is there a better way? // Following is brittle. Is there a better way?
if (e.getS3ErrorCode().matches("NoSuchKey")) { if ("NoSuchKey".equals(e.getErrorCode())) {
return null; //return null if key not found return null; //return null if key not found
} }
handleS3ServiceException(e); handleServiceException(e);
return null; //never returned - keep compiler happy return null; //never returned - keep compiler happy
} finally {
if (object != null) {
object.closeDataInputStream();
}
} }
} }

View File

@ -53,7 +53,17 @@ public class HttpRequestLog {
String appenderName = name + "requestlog"; String appenderName = name + "requestlog";
Log logger = LogFactory.getLog(loggerName); Log logger = LogFactory.getLog(loggerName);
if (logger instanceof Log4JLogger) { boolean isLog4JLogger;;
try {
isLog4JLogger = logger instanceof Log4JLogger;
} catch (NoClassDefFoundError err) {
// In some dependent projects, log4j may not even be on the classpath at
// runtime, in which case the above instanceof check will throw
// NoClassDefFoundError.
LOG.debug("Could not load Log4JLogger class", err);
isLog4JLogger = false;
}
if (isLog4JLogger) {
Log4JLogger httpLog4JLog = (Log4JLogger)logger; Log4JLogger httpLog4JLog = (Log4JLogger)logger;
Logger httpLogger = httpLog4JLog.getLogger(); Logger httpLogger = httpLog4JLog.getLogger();
Appender appender = null; Appender appender = null;

View File

@ -1560,7 +1560,7 @@ public class UserGroupInformation {
return Subject.doAs(subject, action); return Subject.doAs(subject, action);
} catch (PrivilegedActionException pae) { } catch (PrivilegedActionException pae) {
Throwable cause = pae.getCause(); Throwable cause = pae.getCause();
LOG.error("PriviledgedActionException as:"+this+" cause:"+cause); LOG.warn("PriviledgedActionException as:"+this+" cause:"+cause);
if (cause instanceof IOException) { if (cause instanceof IOException) {
throw (IOException) cause; throw (IOException) cause;
} else if (cause instanceof Error) { } else if (cause instanceof Error) {

View File

@ -0,0 +1,479 @@
// Code source of this file:
// http://grepcode.com/file/repo1.maven.org/maven2/
// org.apache.maven/maven-artifact/3.1.1/
// org/apache/maven/artifact/versioning/ComparableVersion.java/
//
// Modifications made on top of the source:
// 1. Changed
// package org.apache.maven.artifact.versioning;
// to
// package org.apache.hadoop.util;
// 2. Removed author tags to clear hadoop author tag warning
// author <a href="mailto:kenney@apache.org">Kenney Westerhof</a>
// author <a href="mailto:hboutemy@apache.org">Hervé Boutemy</a>
//
package org.apache.hadoop.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Locale;
import java.util.Properties;
import java.util.Stack;
/**
* Generic implementation of version comparison.
*
* <p>Features:
* <ul>
* <li>mixing of '<code>-</code>' (dash) and '<code>.</code>' (dot) separators,</li>
* <li>transition between characters and digits also constitutes a separator:
* <code>1.0alpha1 =&gt; [1, 0, alpha, 1]</code></li>
* <li>unlimited number of version components,</li>
* <li>version components in the text can be digits or strings,</li>
* <li>strings are checked for well-known qualifiers and the qualifier ordering is used for version ordering.
* Well-known qualifiers (case insensitive) are:<ul>
* <li><code>alpha</code> or <code>a</code></li>
* <li><code>beta</code> or <code>b</code></li>
* <li><code>milestone</code> or <code>m</code></li>
* <li><code>rc</code> or <code>cr</code></li>
* <li><code>snapshot</code></li>
* <li><code>(the empty string)</code> or <code>ga</code> or <code>final</code></li>
* <li><code>sp</code></li>
* </ul>
* Unknown qualifiers are considered after known qualifiers, with lexical order (always case insensitive),
* </li>
* <li>a dash usually precedes a qualifier, and is always less important than something preceded with a dot.</li>
* </ul></p>
*
* @see <a href="https://cwiki.apache.org/confluence/display/MAVENOLD/Versioning">"Versioning" on Maven Wiki</a>
*/
public class ComparableVersion
implements Comparable<ComparableVersion>
{
private String value;
private String canonical;
private ListItem items;
private interface Item
{
int INTEGER_ITEM = 0;
int STRING_ITEM = 1;
int LIST_ITEM = 2;
int compareTo( Item item );
int getType();
boolean isNull();
}
/**
* Represents a numeric item in the version item list.
*/
private static class IntegerItem
implements Item
{
private static final BigInteger BIG_INTEGER_ZERO = new BigInteger( "0" );
private final BigInteger value;
public static final IntegerItem ZERO = new IntegerItem();
private IntegerItem()
{
this.value = BIG_INTEGER_ZERO;
}
public IntegerItem( String str )
{
this.value = new BigInteger( str );
}
public int getType()
{
return INTEGER_ITEM;
}
public boolean isNull()
{
return BIG_INTEGER_ZERO.equals( value );
}
public int compareTo( Item item )
{
if ( item == null )
{
return BIG_INTEGER_ZERO.equals( value ) ? 0 : 1; // 1.0 == 1, 1.1 > 1
}
switch ( item.getType() )
{
case INTEGER_ITEM:
return value.compareTo( ( (IntegerItem) item ).value );
case STRING_ITEM:
return 1; // 1.1 > 1-sp
case LIST_ITEM:
return 1; // 1.1 > 1-1
default:
throw new RuntimeException( "invalid item: " + item.getClass() );
}
}
public String toString()
{
return value.toString();
}
}
/**
* Represents a string in the version item list, usually a qualifier.
*/
private static class StringItem
implements Item
{
private static final String[] QUALIFIERS = { "alpha", "beta", "milestone", "rc", "snapshot", "", "sp" };
private static final List<String> _QUALIFIERS = Arrays.asList( QUALIFIERS );
private static final Properties ALIASES = new Properties();
static
{
ALIASES.put( "ga", "" );
ALIASES.put( "final", "" );
ALIASES.put( "cr", "rc" );
}
/**
* A comparable value for the empty-string qualifier. This one is used to determine if a given qualifier makes
* the version older than one without a qualifier, or more recent.
*/
private static final String RELEASE_VERSION_INDEX = String.valueOf( _QUALIFIERS.indexOf( "" ) );
private String value;
public StringItem( String value, boolean followedByDigit )
{
if ( followedByDigit && value.length() == 1 )
{
// a1 = alpha-1, b1 = beta-1, m1 = milestone-1
switch ( value.charAt( 0 ) )
{
case 'a':
value = "alpha";
break;
case 'b':
value = "beta";
break;
case 'm':
value = "milestone";
break;
}
}
this.value = ALIASES.getProperty( value , value );
}
public int getType()
{
return STRING_ITEM;
}
public boolean isNull()
{
return ( comparableQualifier( value ).compareTo( RELEASE_VERSION_INDEX ) == 0 );
}
/**
* Returns a comparable value for a qualifier.
*
* This method takes into account the ordering of known qualifiers then unknown qualifiers with lexical ordering.
*
* just returning an Integer with the index here is faster, but requires a lot of if/then/else to check for -1
* or QUALIFIERS.size and then resort to lexical ordering. Most comparisons are decided by the first character,
* so this is still fast. If more characters are needed then it requires a lexical sort anyway.
*
* @param qualifier
* @return an equivalent value that can be used with lexical comparison
*/
public static String comparableQualifier( String qualifier )
{
int i = _QUALIFIERS.indexOf( qualifier );
return i == -1 ? ( _QUALIFIERS.size() + "-" + qualifier ) : String.valueOf( i );
}
public int compareTo( Item item )
{
if ( item == null )
{
// 1-rc < 1, 1-ga > 1
return comparableQualifier( value ).compareTo( RELEASE_VERSION_INDEX );
}
switch ( item.getType() )
{
case INTEGER_ITEM:
return -1; // 1.any < 1.1 ?
case STRING_ITEM:
return comparableQualifier( value ).compareTo( comparableQualifier( ( (StringItem) item ).value ) );
case LIST_ITEM:
return -1; // 1.any < 1-1
default:
throw new RuntimeException( "invalid item: " + item.getClass() );
}
}
public String toString()
{
return value;
}
}
/**
* Represents a version list item. This class is used both for the global item list and for sub-lists (which start
* with '-(number)' in the version specification).
*/
private static class ListItem
extends ArrayList<Item>
implements Item
{
public int getType()
{
return LIST_ITEM;
}
public boolean isNull()
{
return ( size() == 0 );
}
void normalize()
{
for ( ListIterator<Item> iterator = listIterator( size() ); iterator.hasPrevious(); )
{
Item item = iterator.previous();
if ( item.isNull() )
{
iterator.remove(); // remove null trailing items: 0, "", empty list
}
else
{
break;
}
}
}
public int compareTo( Item item )
{
if ( item == null )
{
if ( size() == 0 )
{
return 0; // 1-0 = 1- (normalize) = 1
}
Item first = get( 0 );
return first.compareTo( null );
}
switch ( item.getType() )
{
case INTEGER_ITEM:
return -1; // 1-1 < 1.0.x
case STRING_ITEM:
return 1; // 1-1 > 1-sp
case LIST_ITEM:
Iterator<Item> left = iterator();
Iterator<Item> right = ( (ListItem) item ).iterator();
while ( left.hasNext() || right.hasNext() )
{
Item l = left.hasNext() ? left.next() : null;
Item r = right.hasNext() ? right.next() : null;
// if this is shorter, then invert the compare and mul with -1
int result = l == null ? -1 * r.compareTo( l ) : l.compareTo( r );
if ( result != 0 )
{
return result;
}
}
return 0;
default:
throw new RuntimeException( "invalid item: " + item.getClass() );
}
}
public String toString()
{
StringBuilder buffer = new StringBuilder( "(" );
for ( Iterator<Item> iter = iterator(); iter.hasNext(); )
{
buffer.append( iter.next() );
if ( iter.hasNext() )
{
buffer.append( ',' );
}
}
buffer.append( ')' );
return buffer.toString();
}
}
public ComparableVersion( String version )
{
parseVersion( version );
}
public final void parseVersion( String version )
{
this.value = version;
items = new ListItem();
version = version.toLowerCase( Locale.ENGLISH );
ListItem list = items;
Stack<Item> stack = new Stack<Item>();
stack.push( list );
boolean isDigit = false;
int startIndex = 0;
for ( int i = 0; i < version.length(); i++ )
{
char c = version.charAt( i );
if ( c == '.' )
{
if ( i == startIndex )
{
list.add( IntegerItem.ZERO );
}
else
{
list.add( parseItem( isDigit, version.substring( startIndex, i ) ) );
}
startIndex = i + 1;
}
else if ( c == '-' )
{
if ( i == startIndex )
{
list.add( IntegerItem.ZERO );
}
else
{
list.add( parseItem( isDigit, version.substring( startIndex, i ) ) );
}
startIndex = i + 1;
if ( isDigit )
{
list.normalize(); // 1.0-* = 1-*
if ( ( i + 1 < version.length() ) && Character.isDigit( version.charAt( i + 1 ) ) )
{
// new ListItem only if previous were digits and new char is a digit,
// ie need to differentiate only 1.1 from 1-1
list.add( list = new ListItem() );
stack.push( list );
}
}
}
else if ( Character.isDigit( c ) )
{
if ( !isDigit && i > startIndex )
{
list.add( new StringItem( version.substring( startIndex, i ), true ) );
startIndex = i;
}
isDigit = true;
}
else
{
if ( isDigit && i > startIndex )
{
list.add( parseItem( true, version.substring( startIndex, i ) ) );
startIndex = i;
}
isDigit = false;
}
}
if ( version.length() > startIndex )
{
list.add( parseItem( isDigit, version.substring( startIndex ) ) );
}
while ( !stack.isEmpty() )
{
list = (ListItem) stack.pop();
list.normalize();
}
canonical = items.toString();
}
private static Item parseItem( boolean isDigit, String buf )
{
return isDigit ? new IntegerItem( buf ) : new StringItem( buf, false );
}
public int compareTo( ComparableVersion o )
{
return items.compareTo( o.items );
}
public String toString()
{
return value;
}
public boolean equals( Object o )
{
return ( o instanceof ComparableVersion ) && canonical.equals( ( (ComparableVersion) o ).canonical );
}
public int hashCode()
{
return canonical.hashCode();
}
}

View File

@ -17,55 +17,17 @@
*/ */
package org.apache.hadoop.util; package org.apache.hadoop.util;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import com.google.common.collect.ComparisonChain; /**
* A wrapper class to maven's ComparableVersion class, to comply
* with maven's version name string convention
*/
@InterfaceAudience.Private @InterfaceAudience.Private
public abstract class VersionUtil { public abstract class VersionUtil {
private static final Pattern COMPONENT_GROUPS = Pattern.compile("(\\d+)|(\\D+)");
/** /**
* Suffix added by maven for nightly builds and other snapshot releases. * Compares two version name strings using maven's ComparableVersion class.
* These releases are considered to precede the non-SNAPSHOT version *
* with the same version number.
*/
private static final String SNAPSHOT_SUFFIX = "-SNAPSHOT";
/**
* This function splits the two versions on &quot;.&quot; and performs a
* naturally-ordered comparison of the resulting components. For example, the
* version string "0.3" is considered to precede "0.20", despite the fact that
* lexical comparison would consider "0.20" to precede "0.3". This method of
* comparison is similar to the method used by package versioning systems like
* deb and RPM.
*
* Version components are compared numerically whenever possible, however a
* version component can contain non-numeric characters. When a non-numeric
* group of characters is found in a version component, this group is compared
* with the similarly-indexed group in the other version component. If the
* other group is numeric, then the numeric group is considered to precede the
* non-numeric group. If both groups are non-numeric, then a lexical
* comparison is performed.
*
* If two versions have a different number of components, then only the lower
* number of components are compared. If those components are identical
* between the two versions, then the version with fewer components is
* considered to precede the version with more components.
*
* In addition to the above rules, there is one special case: maven SNAPSHOT
* releases are considered to precede a non-SNAPSHOT release with an
* otherwise identical version number. For example, 2.0-SNAPSHOT precedes
* 2.0.
*
* This function returns a negative integer if version1 precedes version2, a
* positive integer if version2 precedes version1, and 0 if and only if the
* two versions' components are identical in value and cardinality.
*
* @param version1 * @param version1
* the first version to compare * the first version to compare
* @param version2 * @param version2
@ -75,58 +37,8 @@ public abstract class VersionUtil {
* versions are equal. * versions are equal.
*/ */
public static int compareVersions(String version1, String version2) { public static int compareVersions(String version1, String version2) {
boolean isSnapshot1 = version1.endsWith(SNAPSHOT_SUFFIX); ComparableVersion v1 = new ComparableVersion(version1);
boolean isSnapshot2 = version2.endsWith(SNAPSHOT_SUFFIX); ComparableVersion v2 = new ComparableVersion(version2);
version1 = stripSnapshotSuffix(version1); return v1.compareTo(v2);
version2 = stripSnapshotSuffix(version2);
String[] version1Parts = version1.split("\\.");
String[] version2Parts = version2.split("\\.");
for (int i = 0; i < version1Parts.length && i < version2Parts.length; i++) {
String component1 = version1Parts[i];
String component2 = version2Parts[i];
if (!component1.equals(component2)) {
Matcher matcher1 = COMPONENT_GROUPS.matcher(component1);
Matcher matcher2 = COMPONENT_GROUPS.matcher(component2);
while (matcher1.find() && matcher2.find()) {
String group1 = matcher1.group();
String group2 = matcher2.group();
if (!group1.equals(group2)) {
if (isNumeric(group1) && isNumeric(group2)) {
return Integer.parseInt(group1) - Integer.parseInt(group2);
} else if (!isNumeric(group1) && !isNumeric(group2)) {
return group1.compareTo(group2);
} else {
return isNumeric(group1) ? -1 : 1;
}
}
}
return component1.length() - component2.length();
}
}
return ComparisonChain.start()
.compare(version1Parts.length, version2Parts.length)
.compare(isSnapshot2, isSnapshot1)
.result();
}
private static String stripSnapshotSuffix(String version) {
if (version.endsWith(SNAPSHOT_SUFFIX)) {
return version.substring(0, version.length() - SNAPSHOT_SUFFIX.length());
} else {
return version;
}
}
private static boolean isNumeric(String s) {
try {
Integer.parseInt(s);
return true;
} catch (NumberFormatException nfe) {
return false;
}
} }
} }

View File

@ -18,8 +18,6 @@
Hadoop MapReduce Next Generation - CLI MiniCluster. Hadoop MapReduce Next Generation - CLI MiniCluster.
\[ {{{./index.html}Go Back}} \]
%{toc|section=1|fromDepth=0} %{toc|section=1|fromDepth=0}
* {Purpose} * {Purpose}
@ -42,7 +40,8 @@ Hadoop MapReduce Next Generation - CLI MiniCluster.
$ mvn clean install -DskipTests $ mvn clean install -DskipTests
$ mvn package -Pdist -Dtar -DskipTests -Dmaven.javadoc.skip $ mvn package -Pdist -Dtar -DskipTests -Dmaven.javadoc.skip
+---+ +---+
<<NOTE:>> You will need protoc 2.5.0 installed. <<NOTE:>> You will need {{{http://code.google.com/p/protobuf/}protoc 2.5.0}}
installed.
The tarball should be available in <<<hadoop-dist/target/>>> directory. The tarball should be available in <<<hadoop-dist/target/>>> directory.

View File

@ -16,8 +16,6 @@
--- ---
${maven.build.timestamp} ${maven.build.timestamp}
\[ {{{../index.html}Go Back}} \]
%{toc|section=1|fromDepth=0} %{toc|section=1|fromDepth=0}
Hadoop MapReduce Next Generation - Cluster Setup Hadoop MapReduce Next Generation - Cluster Setup
@ -29,7 +27,7 @@ Hadoop MapReduce Next Generation - Cluster Setup
with thousands of nodes. with thousands of nodes.
To play with Hadoop, you may first want to install it on a single To play with Hadoop, you may first want to install it on a single
machine (see {{{SingleCluster}Single Node Setup}}). machine (see {{{./SingleCluster.html}Single Node Setup}}).
* {Prerequisites} * {Prerequisites}
@ -571,440 +569,6 @@ $ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh stop proxyserver --config $HADOOP_CONF_D
$ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh stop historyserver --config $HADOOP_CONF_DIR $ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh stop historyserver --config $HADOOP_CONF_DIR
---- ----
* {Running Hadoop in Secure Mode}
This section deals with important parameters to be specified in
to run Hadoop in <<secure mode>> with strong, Kerberos-based
authentication.
* <<<User Accounts for Hadoop Daemons>>>
Ensure that HDFS and YARN daemons run as different Unix users, for e.g.
<<<hdfs>>> and <<<yarn>>>. Also, ensure that the MapReduce JobHistory
server runs as user <<<mapred>>>.
It's recommended to have them share a Unix group, for e.g. <<<hadoop>>>.
*---------------+----------------------------------------------------------------------+
|| User:Group || Daemons |
*---------------+----------------------------------------------------------------------+
| hdfs:hadoop | NameNode, Secondary NameNode, Checkpoint Node, Backup Node, DataNode |
*---------------+----------------------------------------------------------------------+
| yarn:hadoop | ResourceManager, NodeManager |
*---------------+----------------------------------------------------------------------+
| mapred:hadoop | MapReduce JobHistory Server |
*---------------+----------------------------------------------------------------------+
* <<<Permissions for both HDFS and local fileSystem paths>>>
The following table lists various paths on HDFS and local filesystems (on
all nodes) and recommended permissions:
*-------------------+-------------------+------------------+------------------+
|| Filesystem || Path || User:Group || Permissions |
*-------------------+-------------------+------------------+------------------+
| local | <<<dfs.namenode.name.dir>>> | hdfs:hadoop | drwx------ |
*-------------------+-------------------+------------------+------------------+
| local | <<<dfs.datanode.data.dir>>> | hdfs:hadoop | drwx------ |
*-------------------+-------------------+------------------+------------------+
| local | $HADOOP_LOG_DIR | hdfs:hadoop | drwxrwxr-x |
*-------------------+-------------------+------------------+------------------+
| local | $YARN_LOG_DIR | yarn:hadoop | drwxrwxr-x |
*-------------------+-------------------+------------------+------------------+
| local | <<<yarn.nodemanager.local-dirs>>> | yarn:hadoop | drwxr-xr-x |
*-------------------+-------------------+------------------+------------------+
| local | <<<yarn.nodemanager.log-dirs>>> | yarn:hadoop | drwxr-xr-x |
*-------------------+-------------------+------------------+------------------+
| local | container-executor | root:hadoop | --Sr-s--- |
*-------------------+-------------------+------------------+------------------+
| local | <<<conf/container-executor.cfg>>> | root:hadoop | r-------- |
*-------------------+-------------------+------------------+------------------+
| hdfs | / | hdfs:hadoop | drwxr-xr-x |
*-------------------+-------------------+------------------+------------------+
| hdfs | /tmp | hdfs:hadoop | drwxrwxrwxt |
*-------------------+-------------------+------------------+------------------+
| hdfs | /user | hdfs:hadoop | drwxr-xr-x |
*-------------------+-------------------+------------------+------------------+
| hdfs | <<<yarn.nodemanager.remote-app-log-dir>>> | yarn:hadoop | drwxrwxrwxt |
*-------------------+-------------------+------------------+------------------+
| hdfs | <<<mapreduce.jobhistory.intermediate-done-dir>>> | mapred:hadoop | |
| | | | drwxrwxrwxt |
*-------------------+-------------------+------------------+------------------+
| hdfs | <<<mapreduce.jobhistory.done-dir>>> | mapred:hadoop | |
| | | | drwxr-x--- |
*-------------------+-------------------+------------------+------------------+
* Kerberos Keytab files
* HDFS
The NameNode keytab file, on the NameNode host, should look like the
following:
----
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/nn.service.keytab
Keytab name: FILE:/etc/security/keytab/nn.service.keytab
KVNO Timestamp Principal
4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
The Secondary NameNode keytab file, on that host, should look like the
following:
----
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/sn.service.keytab
Keytab name: FILE:/etc/security/keytab/sn.service.keytab
KVNO Timestamp Principal
4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
The DataNode keytab file, on each host, should look like the following:
----
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/dn.service.keytab
Keytab name: FILE:/etc/security/keytab/dn.service.keytab
KVNO Timestamp Principal
4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
* YARN
The ResourceManager keytab file, on the ResourceManager host, should look
like the following:
----
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/rm.service.keytab
Keytab name: FILE:/etc/security/keytab/rm.service.keytab
KVNO Timestamp Principal
4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
The NodeManager keytab file, on each host, should look like the following:
----
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/nm.service.keytab
Keytab name: FILE:/etc/security/keytab/nm.service.keytab
KVNO Timestamp Principal
4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
* MapReduce JobHistory Server
The MapReduce JobHistory Server keytab file, on that host, should look
like the following:
----
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/jhs.service.keytab
Keytab name: FILE:/etc/security/keytab/jhs.service.keytab
KVNO Timestamp Principal
4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
** Configuration in Secure Mode
* <<<conf/core-site.xml>>>
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<hadoop.security.authentication>>> | <kerberos> | <simple> is non-secure. |
*-------------------------+-------------------------+------------------------+
| <<<hadoop.security.authorization>>> | <true> | |
| | | Enable RPC service-level authorization. |
*-------------------------+-------------------------+------------------------+
* <<<conf/hdfs-site.xml>>>
* Configurations for NameNode:
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<dfs.block.access.token.enable>>> | <true> | |
| | | Enable HDFS block access tokens for secure operations. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.https.enable>>> | <true> | |
| | | This value is deprecated. Use dfs.http.policy |
*-------------------------+-------------------------+------------------------+
| <<<dfs.http.policy>>> | <HTTP_ONLY> or <HTTPS_ONLY> or <HTTP_AND_HTTPS> | |
| | | HTTPS_ONLY turns off http access |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.https-address>>> | <nn_host_fqdn:50470> | |
*-------------------------+-------------------------+------------------------+
| <<<dfs.https.port>>> | <50470> | |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.keytab.file>>> | </etc/security/keytab/nn.service.keytab> | |
| | | Kerberos keytab file for the NameNode. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.kerberos.principal>>> | nn/_HOST@REALM.TLD | |
| | | Kerberos principal name for the NameNode. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.kerberos.https.principal>>> | host/_HOST@REALM.TLD | |
| | | HTTPS Kerberos principal name for the NameNode. |
*-------------------------+-------------------------+------------------------+
* Configurations for Secondary NameNode:
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.secondary.http-address>>> | <c_nn_host_fqdn:50090> | |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.secondary.https-port>>> | <50470> | |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.secondary.keytab.file>>> | | |
| | </etc/security/keytab/sn.service.keytab> | |
| | | Kerberos keytab file for the NameNode. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.secondary.kerberos.principal>>> | sn/_HOST@REALM.TLD | |
| | | Kerberos principal name for the Secondary NameNode. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.secondary.kerberos.https.principal>>> | | |
| | host/_HOST@REALM.TLD | |
| | | HTTPS Kerberos principal name for the Secondary NameNode. |
*-------------------------+-------------------------+------------------------+
* Configurations for DataNode:
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<dfs.datanode.data.dir.perm>>> | 700 | |
*-------------------------+-------------------------+------------------------+
| <<<dfs.datanode.address>>> | <0.0.0.0:2003> | |
*-------------------------+-------------------------+------------------------+
| <<<dfs.datanode.https.address>>> | <0.0.0.0:2005> | |
*-------------------------+-------------------------+------------------------+
| <<<dfs.datanode.keytab.file>>> | </etc/security/keytab/dn.service.keytab> | |
| | | Kerberos keytab file for the DataNode. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.datanode.kerberos.principal>>> | dn/_HOST@REALM.TLD | |
| | | Kerberos principal name for the DataNode. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.datanode.kerberos.https.principal>>> | | |
| | host/_HOST@REALM.TLD | |
| | | HTTPS Kerberos principal name for the DataNode. |
*-------------------------+-------------------------+------------------------+
* <<<conf/yarn-site.xml>>>
* WebAppProxy
The <<<WebAppProxy>>> provides a proxy between the web applications
exported by an application and an end user. If security is enabled
it will warn users before accessing a potentially unsafe web application.
Authentication and authorization using the proxy is handled just like
any other privileged web application.
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<yarn.web-proxy.address>>> | | |
| | <<<WebAppProxy>>> host:port for proxy to AM web apps. | |
| | | <host:port> if this is the same as <<<yarn.resourcemanager.webapp.address>>>|
| | | or it is not defined then the <<<ResourceManager>>> will run the proxy|
| | | otherwise a standalone proxy server will need to be launched.|
*-------------------------+-------------------------+------------------------+
| <<<yarn.web-proxy.keytab>>> | | |
| | </etc/security/keytab/web-app.service.keytab> | |
| | | Kerberos keytab file for the WebAppProxy. |
*-------------------------+-------------------------+------------------------+
| <<<yarn.web-proxy.principal>>> | wap/_HOST@REALM.TLD | |
| | | Kerberos principal name for the WebAppProxy. |
*-------------------------+-------------------------+------------------------+
* LinuxContainerExecutor
A <<<ContainerExecutor>>> used by YARN framework which define how any
<container> launched and controlled.
The following are the available in Hadoop YARN:
*--------------------------------------+--------------------------------------+
|| ContainerExecutor || Description |
*--------------------------------------+--------------------------------------+
| <<<DefaultContainerExecutor>>> | |
| | The default executor which YARN uses to manage container execution. |
| | The container process has the same Unix user as the NodeManager. |
*--------------------------------------+--------------------------------------+
| <<<LinuxContainerExecutor>>> | |
| | Supported only on GNU/Linux, this executor runs the containers as either the |
| | YARN user who submitted the application (when full security is enabled) or |
| | as a dedicated user (defaults to nobody) when full security is not enabled. |
| | When full security is enabled, this executor requires all user accounts to be |
| | created on the cluster nodes where the containers are launched. It uses |
| | a <setuid> executable that is included in the Hadoop distribution. |
| | The NodeManager uses this executable to launch and kill containers. |
| | The setuid executable switches to the user who has submitted the |
| | application and launches or kills the containers. For maximum security, |
| | this executor sets up restricted permissions and user/group ownership of |
| | local files and directories used by the containers such as the shared |
| | objects, jars, intermediate files, log files etc. Particularly note that, |
| | because of this, except the application owner and NodeManager, no other |
| | user can access any of the local files/directories including those |
| | localized as part of the distributed cache. |
*--------------------------------------+--------------------------------------+
To build the LinuxContainerExecutor executable run:
----
$ mvn package -Dcontainer-executor.conf.dir=/etc/hadoop/
----
The path passed in <<<-Dcontainer-executor.conf.dir>>> should be the
path on the cluster nodes where a configuration file for the setuid
executable should be located. The executable should be installed in
$HADOOP_YARN_HOME/bin.
The executable must have specific permissions: 6050 or --Sr-s---
permissions user-owned by <root> (super-user) and group-owned by a
special group (e.g. <<<hadoop>>>) of which the NodeManager Unix user is
the group member and no ordinary application user is. If any application
user belongs to this special group, security will be compromised. This
special group name should be specified for the configuration property
<<<yarn.nodemanager.linux-container-executor.group>>> in both
<<<conf/yarn-site.xml>>> and <<<conf/container-executor.cfg>>>.
For example, let's say that the NodeManager is run as user <yarn> who is
part of the groups users and <hadoop>, any of them being the primary group.
Let also be that <users> has both <yarn> and another user
(application submitter) <alice> as its members, and <alice> does not
belong to <hadoop>. Going by the above description, the setuid/setgid
executable should be set 6050 or --Sr-s--- with user-owner as <yarn> and
group-owner as <hadoop> which has <yarn> as its member (and not <users>
which has <alice> also as its member besides <yarn>).
The LinuxTaskController requires that paths including and leading up to
the directories specified in <<<yarn.nodemanager.local-dirs>>> and
<<<yarn.nodemanager.log-dirs>>> to be set 755 permissions as described
above in the table on permissions on directories.
* <<<conf/container-executor.cfg>>>
The executable requires a configuration file called
<<<container-executor.cfg>>> to be present in the configuration
directory passed to the mvn target mentioned above.
The configuration file must be owned by the user running NodeManager
(user <<<yarn>>> in the above example), group-owned by anyone and
should have the permissions 0400 or r--------.
The executable requires following configuration items to be present
in the <<<conf/container-executor.cfg>>> file. The items should be
mentioned as simple key=value pairs, one per-line:
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<yarn.nodemanager.linux-container-executor.group>>> | <hadoop> | |
| | | Unix group of the NodeManager. The group owner of the |
| | |<container-executor> binary should be this group. Should be same as the |
| | | value with which the NodeManager is configured. This configuration is |
| | | required for validating the secure access of the <container-executor> |
| | | binary. |
*-------------------------+-------------------------+------------------------+
| <<<banned.users>>> | hfds,yarn,mapred,bin | Banned users. |
*-------------------------+-------------------------+------------------------+
| <<<allowed.system.users>>> | foo,bar | Allowed system users. |
*-------------------------+-------------------------+------------------------+
| <<<min.user.id>>> | 1000 | Prevent other super-users. |
*-------------------------+-------------------------+------------------------+
To re-cap, here are the local file-sysytem permissions required for the
various paths related to the <<<LinuxContainerExecutor>>>:
*-------------------+-------------------+------------------+------------------+
|| Filesystem || Path || User:Group || Permissions |
*-------------------+-------------------+------------------+------------------+
| local | container-executor | root:hadoop | --Sr-s--- |
*-------------------+-------------------+------------------+------------------+
| local | <<<conf/container-executor.cfg>>> | root:hadoop | r-------- |
*-------------------+-------------------+------------------+------------------+
| local | <<<yarn.nodemanager.local-dirs>>> | yarn:hadoop | drwxr-xr-x |
*-------------------+-------------------+------------------+------------------+
| local | <<<yarn.nodemanager.log-dirs>>> | yarn:hadoop | drwxr-xr-x |
*-------------------+-------------------+------------------+------------------+
* Configurations for ResourceManager:
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<yarn.resourcemanager.keytab>>> | | |
| | </etc/security/keytab/rm.service.keytab> | |
| | | Kerberos keytab file for the ResourceManager. |
*-------------------------+-------------------------+------------------------+
| <<<yarn.resourcemanager.principal>>> | rm/_HOST@REALM.TLD | |
| | | Kerberos principal name for the ResourceManager. |
*-------------------------+-------------------------+------------------------+
* Configurations for NodeManager:
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<yarn.nodemanager.keytab>>> | </etc/security/keytab/nm.service.keytab> | |
| | | Kerberos keytab file for the NodeManager. |
*-------------------------+-------------------------+------------------------+
| <<<yarn.nodemanager.principal>>> | nm/_HOST@REALM.TLD | |
| | | Kerberos principal name for the NodeManager. |
*-------------------------+-------------------------+------------------------+
| <<<yarn.nodemanager.container-executor.class>>> | | |
| | <<<org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor>>> |
| | | Use LinuxContainerExecutor. |
*-------------------------+-------------------------+------------------------+
| <<<yarn.nodemanager.linux-container-executor.group>>> | <hadoop> | |
| | | Unix group of the NodeManager. |
*-------------------------+-------------------------+------------------------+
* <<<conf/mapred-site.xml>>>
* Configurations for MapReduce JobHistory Server:
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<mapreduce.jobhistory.address>>> | | |
| | MapReduce JobHistory Server <host:port> | Default port is 10020. |
*-------------------------+-------------------------+------------------------+
| <<<mapreduce.jobhistory.keytab>>> | |
| | </etc/security/keytab/jhs.service.keytab> | |
| | | Kerberos keytab file for the MapReduce JobHistory Server. |
*-------------------------+-------------------------+------------------------+
| <<<mapreduce.jobhistory.principal>>> | jhs/_HOST@REALM.TLD | |
| | | Kerberos principal name for the MapReduce JobHistory Server. |
*-------------------------+-------------------------+------------------------+
* {Operating the Hadoop Cluster} * {Operating the Hadoop Cluster}

View File

@ -44,8 +44,9 @@ Overview
Generic Options Generic Options
The following options are supported by {{dfsadmin}}, {{fs}}, {{fsck}}, The following options are supported by {{dfsadmin}}, {{fs}}, {{fsck}},
{{job}} and {{fetchdt}}. Applications should implement {{{some_useful_url}Tool}} to support {{job}} and {{fetchdt}}. Applications should implement
{{{another_useful_url}GenericOptions}}. {{{../../api/org/apache/hadoop/util/Tool.html}Tool}} to support
GenericOptions.
*------------------------------------------------+-----------------------------+ *------------------------------------------------+-----------------------------+
|| GENERIC_OPTION || Description || GENERIC_OPTION || Description
@ -123,7 +124,8 @@ User Commands
* <<<fsck>>> * <<<fsck>>>
Runs a HDFS filesystem checking utility. See {{Fsck}} for more info. Runs a HDFS filesystem checking utility.
See {{{../hadoop-hdfs/HdfsUserGuide.html#fsck}fsck}} for more info.
Usage: <<<hadoop fsck [GENERIC_OPTIONS] <path> [-move | -delete | -openforwrite] [-files [-blocks [-locations | -racks]]]>>> Usage: <<<hadoop fsck [GENERIC_OPTIONS] <path> [-move | -delete | -openforwrite] [-files [-blocks [-locations | -racks]]]>>>
@ -149,7 +151,8 @@ User Commands
* <<<fetchdt>>> * <<<fetchdt>>>
Gets Delegation Token from a NameNode. See {{fetchdt}} for more info. Gets Delegation Token from a NameNode.
See {{{../hadoop-hdfs/HdfsUserGuide.html#fetchdt}fetchdt}} for more info.
Usage: <<<hadoop fetchdt [GENERIC_OPTIONS] [--webservice <namenode_http_addr>] <path> >>> Usage: <<<hadoop fetchdt [GENERIC_OPTIONS] [--webservice <namenode_http_addr>] <path> >>>
@ -302,7 +305,8 @@ Administration Commands
* <<<balancer>>> * <<<balancer>>>
Runs a cluster balancing utility. An administrator can simply press Ctrl-C Runs a cluster balancing utility. An administrator can simply press Ctrl-C
to stop the rebalancing process. See Rebalancer for more details. to stop the rebalancing process. See
{{{../hadoop-hdfs/HdfsUserGuide.html#Rebalancer}Rebalancer}} for more details.
Usage: <<<hadoop balancer [-threshold <threshold>]>>> Usage: <<<hadoop balancer [-threshold <threshold>]>>>
@ -445,7 +449,7 @@ Administration Commands
* <<<namenode>>> * <<<namenode>>>
Runs the namenode. More info about the upgrade, rollback and finalize is Runs the namenode. More info about the upgrade, rollback and finalize is
at Upgrade Rollback at {{{../hadoop-hdfs/HdfsUserGuide.html#Upgrade_and_Rollback}Upgrade Rollback}}.
Usage: <<<hadoop namenode [-format] | [-upgrade] | [-rollback] | [-finalize] | [-importCheckpoint]>>> Usage: <<<hadoop namenode [-format] | [-upgrade] | [-rollback] | [-finalize] | [-importCheckpoint]>>>
@ -474,8 +478,9 @@ Administration Commands
* <<<secondarynamenode>>> * <<<secondarynamenode>>>
Runs the HDFS secondary namenode. See Secondary Namenode for more Runs the HDFS secondary namenode.
info. See {{{../hadoop-hdfs/HdfsUserGuide.html#Secondary_NameNode}Secondary Namenode}}
for more info.
Usage: <<<hadoop secondarynamenode [-checkpoint [force]] | [-geteditsize]>>> Usage: <<<hadoop secondarynamenode [-checkpoint [force]] | [-geteditsize]>>>

View File

@ -233,9 +233,10 @@ hand-in-hand to address this.
* In particular for MapReduce applications, the developer community will * In particular for MapReduce applications, the developer community will
try our best to support provide binary compatibility across major try our best to support provide binary compatibility across major
releases e.g. applications using org.apache.hadop.mapred.* APIs are releases e.g. applications using org.apache.hadoop.mapred.
supported compatibly across hadoop-1.x and hadoop-2.x. See
{{{../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html} * APIs are supported compatibly across hadoop-1.x and hadoop-2.x. See
{{{../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html}
Compatibility for MapReduce applications between hadoop-1.x and hadoop-2.x}} Compatibility for MapReduce applications between hadoop-1.x and hadoop-2.x}}
for more details. for more details.
@ -248,13 +249,13 @@ hand-in-hand to address this.
* {{{../hadoop-hdfs/WebHDFS.html}WebHDFS}} - Stable * {{{../hadoop-hdfs/WebHDFS.html}WebHDFS}} - Stable
* {{{../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html}ResourceManager}} * {{{../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html}ResourceManager}}
* {{{../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html}NodeManager}} * {{{../../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html}NodeManager}}
* {{{../hadoop-yarn/hadoop-yarn-site/MapredAppMasterRest.html}MR Application Master}} * {{{../../hadoop-yarn/hadoop-yarn-site/MapredAppMasterRest.html}MR Application Master}}
* {{{../hadoop-yarn/hadoop-yarn-site/HistoryServerRest.html}History Server}} * {{{../../hadoop-yarn/hadoop-yarn-site/HistoryServerRest.html}History Server}}
*** Policy *** Policy
@ -512,7 +513,8 @@ hand-in-hand to address this.
{{{https://issues.apache.org/jira/browse/HADOOP-9517}HADOOP-9517}} {{{https://issues.apache.org/jira/browse/HADOOP-9517}HADOOP-9517}}
* Binary compatibility for MapReduce end-user applications between hadoop-1.x and hadoop-2.x - * Binary compatibility for MapReduce end-user applications between hadoop-1.x and hadoop-2.x -
{{{../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html}MapReduce Compatibility between hadoop-1.x and hadoop-2.x}} {{{../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html}
MapReduce Compatibility between hadoop-1.x and hadoop-2.x}}
* Annotations for interfaces as per interface classification * Annotations for interfaces as per interface classification
schedule - schedule -

View File

@ -88,7 +88,7 @@ chgrp
Change group association of files. The user must be the owner of files, or Change group association of files. The user must be the owner of files, or
else a super-user. Additional information is in the else a super-user. Additional information is in the
{{{betterurl}Permissions Guide}}. {{{../hadoop-hdfs/HdfsPermissionsGuide.html}Permissions Guide}}.
Options Options
@ -101,7 +101,7 @@ chmod
Change the permissions of files. With -R, make the change recursively Change the permissions of files. With -R, make the change recursively
through the directory structure. The user must be the owner of the file, or through the directory structure. The user must be the owner of the file, or
else a super-user. Additional information is in the else a super-user. Additional information is in the
{{{betterurl}Permissions Guide}}. {{{../hadoop-hdfs/HdfsPermissionsGuide.html}Permissions Guide}}.
Options Options
@ -112,7 +112,7 @@ chown
Usage: <<<hdfs dfs -chown [-R] [OWNER][:[GROUP]] URI [URI ]>>> Usage: <<<hdfs dfs -chown [-R] [OWNER][:[GROUP]] URI [URI ]>>>
Change the owner of files. The user must be a super-user. Additional information Change the owner of files. The user must be a super-user. Additional information
is in the {{{betterurl}Permissions Guide}}. is in the {{{../hadoop-hdfs/HdfsPermissionsGuide.html}Permissions Guide}}.
Options Options
@ -210,8 +210,8 @@ expunge
Usage: <<<hdfs dfs -expunge>>> Usage: <<<hdfs dfs -expunge>>>
Empty the Trash. Refer to the {{{betterurl}HDFS Architecture Guide}} for Empty the Trash. Refer to the {{{../hadoop-hdfs/HdfsDesign.html}
more information on the Trash feature. HDFS Architecture Guide}} for more information on the Trash feature.
get get
@ -439,7 +439,9 @@ test
Options: Options:
* The -e option will check to see if the file exists, returning 0 if true. * The -e option will check to see if the file exists, returning 0 if true.
* The -z option will check to see if the file is zero length, returning 0 if true. * The -z option will check to see if the file is zero length, returning 0 if true.
* The -d option will check to see if the path is directory, returning 0 if true. * The -d option will check to see if the path is directory, returning 0 if true.
Example: Example:

View File

@ -18,8 +18,6 @@
Hadoop Interface Taxonomy: Audience and Stability Classification Hadoop Interface Taxonomy: Audience and Stability Classification
\[ {{{./index.html}Go Back}} \]
%{toc|section=1|fromDepth=0} %{toc|section=1|fromDepth=0}
* Motivation * Motivation

View File

@ -117,23 +117,19 @@ Native Libraries Guide
* zlib-development package (stable version >= 1.2.0) * zlib-development package (stable version >= 1.2.0)
Once you installed the prerequisite packages use the standard hadoop Once you installed the prerequisite packages use the standard hadoop
build.xml file and pass along the compile.native flag (set to true) to pom.xml file and pass along the native flag to build the native hadoop
build the native hadoop library: library:
---- ----
$ ant -Dcompile.native=true <target> $ mvn package -Pdist,native -Dskiptests -Dtar
---- ----
You should see the newly-built library in: You should see the newly-built library in:
---- ----
$ build/native/<platform>/lib $ hadoop-dist/target/hadoop-${project.version}/lib/native
---- ----
where <platform> is a combination of the system-properties:
${os.name}-${os.arch}-${sun.arch.data.model} (for example,
Linux-i386-32).
Please note the following: Please note the following:
* It is mandatory to install both the zlib and gzip development * It is mandatory to install both the zlib and gzip development

View File

@ -0,0 +1,637 @@
~~ Licensed under the Apache License, Version 2.0 (the "License");
~~ you may not use this file except in compliance with the License.
~~ You may obtain a copy of the License at
~~
~~ http://www.apache.org/licenses/LICENSE-2.0
~~
~~ Unless required by applicable law or agreed to in writing, software
~~ distributed under the License is distributed on an "AS IS" BASIS,
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~~ See the License for the specific language governing permissions and
~~ limitations under the License. See accompanying LICENSE file.
---
Hadoop in Secure Mode
---
---
${maven.build.timestamp}
%{toc|section=0|fromDepth=0|toDepth=3}
Hadoop in Secure Mode
* Introduction
This document describes how to configure authentication for Hadoop in
secure mode.
By default Hadoop runs in non-secure mode in which no actual
authentication is required.
By configuring Hadoop runs in secure mode,
each user and service needs to be authenticated by Kerberos
in order to use Hadoop services.
Security features of Hadoop consist of
{{{Authentication}authentication}},
{{{./ServiceLevelAuth.html}service level authorization}},
{{{./HttpAuthentication.html}authentication for Web consoles}}
and {{{Data confidentiality}data confidenciality}}.
* Authentication
** End User Accounts
When service level authentication is turned on,
end users using Hadoop in secure mode needs to be authenticated by Kerberos.
The simplest way to do authentication is using <<<kinit>>> command of Kerberos.
** User Accounts for Hadoop Daemons
Ensure that HDFS and YARN daemons run as different Unix users,
e.g. <<<hdfs>>> and <<<yarn>>>.
Also, ensure that the MapReduce JobHistory server runs as
different user such as <<<mapred>>>.
It's recommended to have them share a Unix group, for e.g. <<<hadoop>>>.
See also "{{Mapping from user to group}}" for group management.
*---------------+----------------------------------------------------------------------+
|| User:Group || Daemons |
*---------------+----------------------------------------------------------------------+
| hdfs:hadoop | NameNode, Secondary NameNode, JournalNode, DataNode |
*---------------+----------------------------------------------------------------------+
| yarn:hadoop | ResourceManager, NodeManager |
*---------------+----------------------------------------------------------------------+
| mapred:hadoop | MapReduce JobHistory Server |
*---------------+----------------------------------------------------------------------+
** Kerberos principals for Hadoop Daemons and Users
For running hadoop service daemons in Hadoop in secure mode,
Kerberos principals are required.
Each service reads auhenticate information saved in keytab file with appropriate permission.
HTTP web-consoles should be served by principal different from RPC's one.
Subsections below shows the examples of credentials for Hadoop services.
*** HDFS
The NameNode keytab file, on the NameNode host, should look like the
following:
----
$ klist -e -k -t /etc/security/keytab/nn.service.keytab
Keytab name: FILE:/etc/security/keytab/nn.service.keytab
KVNO Timestamp Principal
4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
The Secondary NameNode keytab file, on that host, should look like the
following:
----
$ klist -e -k -t /etc/security/keytab/sn.service.keytab
Keytab name: FILE:/etc/security/keytab/sn.service.keytab
KVNO Timestamp Principal
4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
The DataNode keytab file, on each host, should look like the following:
----
$ klist -e -k -t /etc/security/keytab/dn.service.keytab
Keytab name: FILE:/etc/security/keytab/dn.service.keytab
KVNO Timestamp Principal
4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
*** YARN
The ResourceManager keytab file, on the ResourceManager host, should look
like the following:
----
$ klist -e -k -t /etc/security/keytab/rm.service.keytab
Keytab name: FILE:/etc/security/keytab/rm.service.keytab
KVNO Timestamp Principal
4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
The NodeManager keytab file, on each host, should look like the following:
----
$ klist -e -k -t /etc/security/keytab/nm.service.keytab
Keytab name: FILE:/etc/security/keytab/nm.service.keytab
KVNO Timestamp Principal
4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
*** MapReduce JobHistory Server
The MapReduce JobHistory Server keytab file, on that host, should look
like the following:
----
$ klist -e -k -t /etc/security/keytab/jhs.service.keytab
Keytab name: FILE:/etc/security/keytab/jhs.service.keytab
KVNO Timestamp Principal
4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
----
** Mapping from Kerberos principal to OS user account
Hadoop maps Kerberos principal to OS user account using
the rule specified by <<<hadoop.security.auth_to_local>>>
which works in the same way as the <<<auth_to_local>>> in
{{{http://web.mit.edu/Kerberos/krb5-latest/doc/admin/conf_files/krb5_conf.html}Kerberos configuration file (krb5.conf)}}.
By default, it picks the first component of principal name as a user name
if the realms matches to the <<<defalut_realm>>> (usually defined in /etc/krb5.conf).
For example, <<<host/full.qualified.domain.name@REALM.TLD>>> is mapped to <<<host>>>
by default rule.
** Mapping from user to group
Though files on HDFS are associated to owner and group,
Hadoop does not have the definition of group by itself.
Mapping from user to group is done by OS or LDAP.
You can change a way of mapping by
specifying the name of mapping provider as a value of
<<<hadoop.security.group.mapping>>>
See {{{../hadoop-hdfs/HdfsPermissionsGuide.html}HDFS Permissions Guide}} for details.
Practically you need to manage SSO environment using Kerberos with LDAP
for Hadoop in secure mode.
** Proxy user
Some products such as Apache Oozie which access the services of Hadoop
on behalf of end users need to be able to impersonate end users.
You can configure proxy user using properties
<<<hadoop.proxyuser.${superuser}.hosts>>> and <<<hadoop.proxyuser.${superuser}.groups>>>.
For example, by specifying as below in core-site.xml,
user named <<<oozie>>> accessing from any host
can impersonate any user belonging to any group.
----
<property>
<name>hadoop.proxyuser.oozie.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.groups</name>
<value>*</value>
</property>
----
** Secure DataNode
Because the data transfer protocol of DataNode
does not use the RPC framework of Hadoop,
DataNode must authenticate itself by
using privileged ports which are specified by
<<<dfs.datanode.address>>> and <<<dfs.datanode.http.address>>>.
This authentication is based on the assumption
that the attacker won't be able to get root privileges.
When you execute <<<hdfs datanode>>> command as root,
server process binds privileged port at first,
then drops privilege and runs as the user account specified by
<<<HADOOP_SECURE_DN_USER>>>.
This startup process uses jsvc installed to <<<JSVC_HOME>>>.
You must specify <<<HADOOP_SECURE_DN_USER>>> and <<<JSVC_HOME>>>
as environment variables on start up (in hadoop-env.sh).
* Data confidentiality
** Data Encryption on RPC
The data transfered between hadoop services and clients.
Setting <<<hadoop.rpc.protection>>> to <<<"privacy">>> in the core-site.xml
activate data encryption.
** Data Encryption on Block data transfer.
You need to set <<<dfs.encrypt.data.transfer>>> to <<<"true">>> in the hdfs-site.xml
in order to activate data encryption for data transfer protocol of DataNode.
** Data Encryption on HTTP
Data transfer between Web-console and clients are protected by using SSL(HTTPS).
* Configuration
** Permissions for both HDFS and local fileSystem paths
The following table lists various paths on HDFS and local filesystems (on
all nodes) and recommended permissions:
*-------------------+-------------------+------------------+------------------+
|| Filesystem || Path || User:Group || Permissions |
*-------------------+-------------------+------------------+------------------+
| local | <<<dfs.namenode.name.dir>>> | hdfs:hadoop | drwx------ |
*-------------------+-------------------+------------------+------------------+
| local | <<<dfs.datanode.data.dir>>> | hdfs:hadoop | drwx------ |
*-------------------+-------------------+------------------+------------------+
| local | $HADOOP_LOG_DIR | hdfs:hadoop | drwxrwxr-x |
*-------------------+-------------------+------------------+------------------+
| local | $YARN_LOG_DIR | yarn:hadoop | drwxrwxr-x |
*-------------------+-------------------+------------------+------------------+
| local | <<<yarn.nodemanager.local-dirs>>> | yarn:hadoop | drwxr-xr-x |
*-------------------+-------------------+------------------+------------------+
| local | <<<yarn.nodemanager.log-dirs>>> | yarn:hadoop | drwxr-xr-x |
*-------------------+-------------------+------------------+------------------+
| local | container-executor | root:hadoop | --Sr-s--- |
*-------------------+-------------------+------------------+------------------+
| local | <<<conf/container-executor.cfg>>> | root:hadoop | r-------- |
*-------------------+-------------------+------------------+------------------+
| hdfs | / | hdfs:hadoop | drwxr-xr-x |
*-------------------+-------------------+------------------+------------------+
| hdfs | /tmp | hdfs:hadoop | drwxrwxrwxt |
*-------------------+-------------------+------------------+------------------+
| hdfs | /user | hdfs:hadoop | drwxr-xr-x |
*-------------------+-------------------+------------------+------------------+
| hdfs | <<<yarn.nodemanager.remote-app-log-dir>>> | yarn:hadoop | drwxrwxrwxt |
*-------------------+-------------------+------------------+------------------+
| hdfs | <<<mapreduce.jobhistory.intermediate-done-dir>>> | mapred:hadoop | |
| | | | drwxrwxrwxt |
*-------------------+-------------------+------------------+------------------+
| hdfs | <<<mapreduce.jobhistory.done-dir>>> | mapred:hadoop | |
| | | | drwxr-x--- |
*-------------------+-------------------+------------------+------------------+
** Common Configurations
In order to turn on RPC authentication in hadoop,
set the value of <<<hadoop.security.authentication>>> property to
<<<"kerberos">>>, and set security related settings listed below appropriately.
The following properties should be in the <<<core-site.xml>>> of all the
nodes in the cluster.
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<hadoop.security.authentication>>> | <kerberos> | |
| | | <<<simple>>> : No authentication. (default) \
| | | <<<kerberos>>> : Enable authentication by Kerberos. |
*-------------------------+-------------------------+------------------------+
| <<<hadoop.security.authorization>>> | <true> | |
| | | Enable {{{./ServiceLevelAuth.html}RPC service-level authorization}}. |
*-------------------------+-------------------------+------------------------+
| <<<hadoop.rpc.protection>>> | <authentication> |
| | | <authentication> : authentication only (default) \
| | | <integrity> : integrity check in addition to authentication \
| | | <privacy> : data encryption in addition to integrity |
*-------------------------+-------------------------+------------------------+
| <<<hadoop.security.auth_to_local>>> | | |
| | <<<RULE:>>><exp1>\
| | <<<RULE:>>><exp2>\
| | <...>\
| | DEFAULT |
| | | The value is string containing new line characters.
| | | See
| | | {{{http://web.mit.edu/Kerberos/krb5-latest/doc/admin/conf_files/krb5_conf.html}Kerberos documentation}}
| | | for format for <exp>.
*-------------------------+-------------------------+------------------------+
| <<<hadoop.proxyuser.>>><superuser><<<.hosts>>> | | |
| | | comma separated hosts from which <superuser> access are allowd to impersonation. |
| | | <<<*>>> means wildcard. |
*-------------------------+-------------------------+------------------------+
| <<<hadoop.proxyuser.>>><superuser><<<.groups>>> | | |
| | | comma separated groups to which users impersonated by <superuser> belongs. |
| | | <<<*>>> means wildcard. |
*-------------------------+-------------------------+------------------------+
Configuration for <<<conf/core-site.xml>>>
** NameNode
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<dfs.block.access.token.enable>>> | <true> | |
| | | Enable HDFS block access tokens for secure operations. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.https.enable>>> | <true> | |
| | | This value is deprecated. Use dfs.http.policy |
*-------------------------+-------------------------+------------------------+
| <<<dfs.http.policy>>> | <HTTP_ONLY> or <HTTPS_ONLY> or <HTTP_AND_HTTPS> | |
| | | HTTPS_ONLY turns off http access |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.https-address>>> | <nn_host_fqdn:50470> | |
*-------------------------+-------------------------+------------------------+
| <<<dfs.https.port>>> | <50470> | |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.keytab.file>>> | </etc/security/keytab/nn.service.keytab> | |
| | | Kerberos keytab file for the NameNode. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.kerberos.principal>>> | nn/_HOST@REALM.TLD | |
| | | Kerberos principal name for the NameNode. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.kerberos.https.principal>>> | host/_HOST@REALM.TLD | |
| | | HTTPS Kerberos principal name for the NameNode. |
*-------------------------+-------------------------+------------------------+
Configuration for <<<conf/hdfs-site.xml>>>
** Secondary NameNode
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.secondary.http-address>>> | <c_nn_host_fqdn:50090> | |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.secondary.https-port>>> | <50470> | |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.secondary.keytab.file>>> | | |
| | </etc/security/keytab/sn.service.keytab> | |
| | | Kerberos keytab file for the NameNode. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.secondary.kerberos.principal>>> | sn/_HOST@REALM.TLD | |
| | | Kerberos principal name for the Secondary NameNode. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.namenode.secondary.kerberos.https.principal>>> | | |
| | host/_HOST@REALM.TLD | |
| | | HTTPS Kerberos principal name for the Secondary NameNode. |
*-------------------------+-------------------------+------------------------+
Configuration for <<<conf/hdfs-site.xml>>>
** DataNode
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<dfs.datanode.data.dir.perm>>> | 700 | |
*-------------------------+-------------------------+------------------------+
| <<<dfs.datanode.address>>> | <0.0.0.0:1004> | |
| | | Secure DataNode must use privileged port |
| | | in order to assure that the server was started securely. |
| | | This means that the server must be started via jsvc. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.datanode.http.address>>> | <0.0.0.0:1006> | |
| | | Secure DataNode must use privileged port |
| | | in order to assure that the server was started securely. |
| | | This means that the server must be started via jsvc. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.datanode.https.address>>> | <0.0.0.0:50470> | |
*-------------------------+-------------------------+------------------------+
| <<<dfs.datanode.keytab.file>>> | </etc/security/keytab/dn.service.keytab> | |
| | | Kerberos keytab file for the DataNode. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.datanode.kerberos.principal>>> | dn/_HOST@REALM.TLD | |
| | | Kerberos principal name for the DataNode. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.datanode.kerberos.https.principal>>> | | |
| | host/_HOST@REALM.TLD | |
| | | HTTPS Kerberos principal name for the DataNode. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.encrypt.data.transfer>>> | <false> | |
| | | set to <<<true>>> when using data encryption |
*-------------------------+-------------------------+------------------------+
Configuration for <<<conf/hdfs-site.xml>>>
** WebHDFS
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<dfs.webhdfs.enabled>>> | http/_HOST@REALM.TLD | |
| | | Enable security on WebHDFS. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.web.authentication.kerberos.principal>>> | http/_HOST@REALM.TLD | |
| | | Kerberos keytab file for the WebHDFS. |
*-------------------------+-------------------------+------------------------+
| <<<dfs.web.authentication.kerberos.keytab>>> | </etc/security/keytab/http.service.keytab> | |
| | | Kerberos principal name for WebHDFS. |
*-------------------------+-------------------------+------------------------+
Configuration for <<<conf/hdfs-site.xml>>>
** ResourceManager
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<yarn.resourcemanager.keytab>>> | | |
| | </etc/security/keytab/rm.service.keytab> | |
| | | Kerberos keytab file for the ResourceManager. |
*-------------------------+-------------------------+------------------------+
| <<<yarn.resourcemanager.principal>>> | rm/_HOST@REALM.TLD | |
| | | Kerberos principal name for the ResourceManager. |
*-------------------------+-------------------------+------------------------+
Configuration for <<<conf/yarn-site.xml>>>
** NodeManager
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<yarn.nodemanager.keytab>>> | </etc/security/keytab/nm.service.keytab> | |
| | | Kerberos keytab file for the NodeManager. |
*-------------------------+-------------------------+------------------------+
| <<<yarn.nodemanager.principal>>> | nm/_HOST@REALM.TLD | |
| | | Kerberos principal name for the NodeManager. |
*-------------------------+-------------------------+------------------------+
| <<<yarn.nodemanager.container-executor.class>>> | | |
| | <<<org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor>>> |
| | | Use LinuxContainerExecutor. |
*-------------------------+-------------------------+------------------------+
| <<<yarn.nodemanager.linux-container-executor.group>>> | <hadoop> | |
| | | Unix group of the NodeManager. |
*-------------------------+-------------------------+------------------------+
| <<<yarn.nodemanager.linux-container-executor.path>>> | </path/to/bin/container-executor> | |
| | | The path to the executable of Linux container executor. |
*-------------------------+-------------------------+------------------------+
Configuration for <<<conf/yarn-site.xml>>>
** Configuration for WebAppProxy
The <<<WebAppProxy>>> provides a proxy between the web applications
exported by an application and an end user. If security is enabled
it will warn users before accessing a potentially unsafe web application.
Authentication and authorization using the proxy is handled just like
any other privileged web application.
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<yarn.web-proxy.address>>> | | |
| | <<<WebAppProxy>>> host:port for proxy to AM web apps. | |
| | | <host:port> if this is the same as <<<yarn.resourcemanager.webapp.address>>>|
| | | or it is not defined then the <<<ResourceManager>>> will run the proxy|
| | | otherwise a standalone proxy server will need to be launched.|
*-------------------------+-------------------------+------------------------+
| <<<yarn.web-proxy.keytab>>> | | |
| | </etc/security/keytab/web-app.service.keytab> | |
| | | Kerberos keytab file for the WebAppProxy. |
*-------------------------+-------------------------+------------------------+
| <<<yarn.web-proxy.principal>>> | wap/_HOST@REALM.TLD | |
| | | Kerberos principal name for the WebAppProxy. |
*-------------------------+-------------------------+------------------------+
Configuration for <<<conf/yarn-site.xml>>>
** LinuxContainerExecutor
A <<<ContainerExecutor>>> used by YARN framework which define how any
<container> launched and controlled.
The following are the available in Hadoop YARN:
*--------------------------------------+--------------------------------------+
|| ContainerExecutor || Description |
*--------------------------------------+--------------------------------------+
| <<<DefaultContainerExecutor>>> | |
| | The default executor which YARN uses to manage container execution. |
| | The container process has the same Unix user as the NodeManager. |
*--------------------------------------+--------------------------------------+
| <<<LinuxContainerExecutor>>> | |
| | Supported only on GNU/Linux, this executor runs the containers as either the |
| | YARN user who submitted the application (when full security is enabled) or |
| | as a dedicated user (defaults to nobody) when full security is not enabled. |
| | When full security is enabled, this executor requires all user accounts to be |
| | created on the cluster nodes where the containers are launched. It uses |
| | a <setuid> executable that is included in the Hadoop distribution. |
| | The NodeManager uses this executable to launch and kill containers. |
| | The setuid executable switches to the user who has submitted the |
| | application and launches or kills the containers. For maximum security, |
| | this executor sets up restricted permissions and user/group ownership of |
| | local files and directories used by the containers such as the shared |
| | objects, jars, intermediate files, log files etc. Particularly note that, |
| | because of this, except the application owner and NodeManager, no other |
| | user can access any of the local files/directories including those |
| | localized as part of the distributed cache. |
*--------------------------------------+--------------------------------------+
To build the LinuxContainerExecutor executable run:
----
$ mvn package -Dcontainer-executor.conf.dir=/etc/hadoop/
----
The path passed in <<<-Dcontainer-executor.conf.dir>>> should be the
path on the cluster nodes where a configuration file for the setuid
executable should be located. The executable should be installed in
$HADOOP_YARN_HOME/bin.
The executable must have specific permissions: 6050 or --Sr-s---
permissions user-owned by <root> (super-user) and group-owned by a
special group (e.g. <<<hadoop>>>) of which the NodeManager Unix user is
the group member and no ordinary application user is. If any application
user belongs to this special group, security will be compromised. This
special group name should be specified for the configuration property
<<<yarn.nodemanager.linux-container-executor.group>>> in both
<<<conf/yarn-site.xml>>> and <<<conf/container-executor.cfg>>>.
For example, let's say that the NodeManager is run as user <yarn> who is
part of the groups users and <hadoop>, any of them being the primary group.
Let also be that <users> has both <yarn> and another user
(application submitter) <alice> as its members, and <alice> does not
belong to <hadoop>. Going by the above description, the setuid/setgid
executable should be set 6050 or --Sr-s--- with user-owner as <yarn> and
group-owner as <hadoop> which has <yarn> as its member (and not <users>
which has <alice> also as its member besides <yarn>).
The LinuxTaskController requires that paths including and leading up to
the directories specified in <<<yarn.nodemanager.local-dirs>>> and
<<<yarn.nodemanager.log-dirs>>> to be set 755 permissions as described
above in the table on permissions on directories.
* <<<conf/container-executor.cfg>>>
The executable requires a configuration file called
<<<container-executor.cfg>>> to be present in the configuration
directory passed to the mvn target mentioned above.
The configuration file must be owned by the user running NodeManager
(user <<<yarn>>> in the above example), group-owned by anyone and
should have the permissions 0400 or r--------.
The executable requires following configuration items to be present
in the <<<conf/container-executor.cfg>>> file. The items should be
mentioned as simple key=value pairs, one per-line:
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<yarn.nodemanager.linux-container-executor.group>>> | <hadoop> | |
| | | Unix group of the NodeManager. The group owner of the |
| | |<container-executor> binary should be this group. Should be same as the |
| | | value with which the NodeManager is configured. This configuration is |
| | | required for validating the secure access of the <container-executor> |
| | | binary. |
*-------------------------+-------------------------+------------------------+
| <<<banned.users>>> | hfds,yarn,mapred,bin | Banned users. |
*-------------------------+-------------------------+------------------------+
| <<<allowed.system.users>>> | foo,bar | Allowed system users. |
*-------------------------+-------------------------+------------------------+
| <<<min.user.id>>> | 1000 | Prevent other super-users. |
*-------------------------+-------------------------+------------------------+
Configuration for <<<conf/yarn-site.xml>>>
To re-cap, here are the local file-sysytem permissions required for the
various paths related to the <<<LinuxContainerExecutor>>>:
*-------------------+-------------------+------------------+------------------+
|| Filesystem || Path || User:Group || Permissions |
*-------------------+-------------------+------------------+------------------+
| local | container-executor | root:hadoop | --Sr-s--- |
*-------------------+-------------------+------------------+------------------+
| local | <<<conf/container-executor.cfg>>> | root:hadoop | r-------- |
*-------------------+-------------------+------------------+------------------+
| local | <<<yarn.nodemanager.local-dirs>>> | yarn:hadoop | drwxr-xr-x |
*-------------------+-------------------+------------------+------------------+
| local | <<<yarn.nodemanager.log-dirs>>> | yarn:hadoop | drwxr-xr-x |
*-------------------+-------------------+------------------+------------------+
** MapReduce JobHistory Server
*-------------------------+-------------------------+------------------------+
|| Parameter || Value || Notes |
*-------------------------+-------------------------+------------------------+
| <<<mapreduce.jobhistory.address>>> | | |
| | MapReduce JobHistory Server <host:port> | Default port is 10020. |
*-------------------------+-------------------------+------------------------+
| <<<mapreduce.jobhistory.keytab>>> | |
| | </etc/security/keytab/jhs.service.keytab> | |
| | | Kerberos keytab file for the MapReduce JobHistory Server. |
*-------------------------+-------------------------+------------------------+
| <<<mapreduce.jobhistory.principal>>> | jhs/_HOST@REALM.TLD | |
| | | Kerberos principal name for the MapReduce JobHistory Server. |
*-------------------------+-------------------------+------------------------+
Configuration for <<<conf/mapred-site.xml>>>

View File

@ -29,8 +29,10 @@ Service Level Authorization Guide
Make sure Hadoop is installed, configured and setup correctly. For more Make sure Hadoop is installed, configured and setup correctly. For more
information see: information see:
* Single Node Setup for first-time users.
* Cluster Setup for large, distributed clusters. * {{{./SingleCluster.html}Single Node Setup}} for first-time users.
* {{{./ClusterSetup.html}Cluster Setup}} for large, distributed clusters.
* Overview * Overview

View File

@ -18,8 +18,6 @@
Hadoop MapReduce Next Generation - Setting up a Single Node Cluster. Hadoop MapReduce Next Generation - Setting up a Single Node Cluster.
\[ {{{./index.html}Go Back}} \]
%{toc|section=1|fromDepth=0} %{toc|section=1|fromDepth=0}
* Mapreduce Tarball * Mapreduce Tarball
@ -32,7 +30,8 @@ $ mvn clean install -DskipTests
$ cd hadoop-mapreduce-project $ cd hadoop-mapreduce-project
$ mvn clean install assembly:assembly -Pnative $ mvn clean install assembly:assembly -Pnative
+---+ +---+
<<NOTE:>> You will need protoc 2.5.0 installed. <<NOTE:>> You will need {{{http://code.google.com/p/protobuf}protoc 2.5.0}}
installed.
To ignore the native builds in mapreduce you can omit the <<<-Pnative>>> argument To ignore the native builds in mapreduce you can omit the <<<-Pnative>>> argument
for maven. The tarball should be available in <<<target/>>> directory. for maven. The tarball should be available in <<<target/>>> directory.

View File

@ -28,10 +28,30 @@ public class TestVersionUtil {
// Equal versions are equal. // Equal versions are equal.
assertEquals(0, VersionUtil.compareVersions("2.0.0", "2.0.0")); assertEquals(0, VersionUtil.compareVersions("2.0.0", "2.0.0"));
assertEquals(0, VersionUtil.compareVersions("2.0.0a", "2.0.0a")); assertEquals(0, VersionUtil.compareVersions("2.0.0a", "2.0.0a"));
assertEquals(0, VersionUtil.compareVersions("1", "1"));
assertEquals(0, VersionUtil.compareVersions( assertEquals(0, VersionUtil.compareVersions(
"2.0.0-SNAPSHOT", "2.0.0-SNAPSHOT")); "2.0.0-SNAPSHOT", "2.0.0-SNAPSHOT"));
assertEquals(0, VersionUtil.compareVersions("1", "1"));
assertEquals(0, VersionUtil.compareVersions("1", "1.0"));
assertEquals(0, VersionUtil.compareVersions("1", "1.0.0"));
assertEquals(0, VersionUtil.compareVersions("1.0", "1"));
assertEquals(0, VersionUtil.compareVersions("1.0", "1.0"));
assertEquals(0, VersionUtil.compareVersions("1.0", "1.0.0"));
assertEquals(0, VersionUtil.compareVersions("1.0.0", "1"));
assertEquals(0, VersionUtil.compareVersions("1.0.0", "1.0"));
assertEquals(0, VersionUtil.compareVersions("1.0.0", "1.0.0"));
assertEquals(0, VersionUtil.compareVersions("1.0.0-alpha-1", "1.0.0-a1"));
assertEquals(0, VersionUtil.compareVersions("1.0.0-alpha-2", "1.0.0-a2"));
assertEquals(0, VersionUtil.compareVersions("1.0.0-alpha1", "1.0.0-alpha-1"));
assertEquals(0, VersionUtil.compareVersions("1a0", "1.0.0-alpha-0"));
assertEquals(0, VersionUtil.compareVersions("1a0", "1-a0"));
assertEquals(0, VersionUtil.compareVersions("1.a0", "1-a0"));
assertEquals(0, VersionUtil.compareVersions("1.a0", "1.0.0-alpha-0"));
// Assert that lower versions are lower, and higher versions are higher. // Assert that lower versions are lower, and higher versions are higher.
assertExpectedValues("1", "2.0.0"); assertExpectedValues("1", "2.0.0");
assertExpectedValues("1.0.0", "2"); assertExpectedValues("1.0.0", "2");
@ -51,15 +71,27 @@ public class TestVersionUtil {
assertExpectedValues("1.0.2a", "1.0.2ab"); assertExpectedValues("1.0.2a", "1.0.2ab");
assertExpectedValues("1.0.0a1", "1.0.0a2"); assertExpectedValues("1.0.0a1", "1.0.0a2");
assertExpectedValues("1.0.0a2", "1.0.0a10"); assertExpectedValues("1.0.0a2", "1.0.0a10");
// The 'a' in "1.a" is not followed by digit, thus not treated as "alpha",
// and treated larger than "1.0", per maven's ComparableVersion class
// implementation.
assertExpectedValues("1.0", "1.a"); assertExpectedValues("1.0", "1.a");
assertExpectedValues("1.0", "1.a0"); //The 'a' in "1.a0" is followed by digit, thus treated as "alpha-<digit>"
assertExpectedValues("1.a0", "1.0");
assertExpectedValues("1a0", "1.0");
assertExpectedValues("1.0.1-alpha-1", "1.0.1-alpha-2");
assertExpectedValues("1.0.1-beta-1", "1.0.1-beta-2");
// Snapshot builds precede their eventual releases. // Snapshot builds precede their eventual releases.
assertExpectedValues("1.0-SNAPSHOT", "1.0"); assertExpectedValues("1.0-SNAPSHOT", "1.0");
assertExpectedValues("1.0", "1.0.0-SNAPSHOT"); assertExpectedValues("1.0.0-SNAPSHOT", "1.0");
assertExpectedValues("1.0.0-SNAPSHOT", "1.0.0"); assertExpectedValues("1.0.0-SNAPSHOT", "1.0.0");
assertExpectedValues("1.0.0", "1.0.1-SNAPSHOT"); assertExpectedValues("1.0.0", "1.0.1-SNAPSHOT");
assertExpectedValues("1.0.1-SNAPSHOT", "1.0.1"); assertExpectedValues("1.0.1-SNAPSHOT", "1.0.1");
assertExpectedValues("1.0.1-SNAPSHOT", "1.0.2");
assertExpectedValues("1.0.1-alpha-1", "1.0.1-SNAPSHOT");
assertExpectedValues("1.0.1-beta-1", "1.0.1-SNAPSHOT");
assertExpectedValues("1.0.1-beta-2", "1.0.1-SNAPSHOT");
} }
private static void assertExpectedValues(String lower, String higher) { private static void assertExpectedValues(String lower, String higher) {

View File

@ -504,6 +504,9 @@ Release 2.4.0 - UNRELEASED
HDFS-5788. listLocatedStatus response can be very large. (Nathan Roberts HDFS-5788. listLocatedStatus response can be very large. (Nathan Roberts
via kihwal) via kihwal)
HDFS-5781. Use an array to record the mapping between FSEditLogOpCode and
the corresponding byte value. (jing9)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn)
@ -979,6 +982,9 @@ Release 2.3.0 - UNRELEASED
HDFS-5677. Need error checking for HA cluster configuration. HDFS-5677. Need error checking for HA cluster configuration.
(Vincent Sheffer via cos) (Vincent Sheffer via cos)
HDFS-5825. Use FileUtils.copyFile() to implement DFSTestUtils.copyFile().
(Haohui Mai via Arpit Agarwal)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES
@ -1142,6 +1148,15 @@ Release 2.3.0 - UNRELEASED
HDFS-5343. When cat command is issued on snapshot files getting unexpected result. HDFS-5343. When cat command is issued on snapshot files getting unexpected result.
(Sathish via umamahesh) (Sathish via umamahesh)
HDFS-5297. Fix dead links in HDFS site documents. (Akira Ajisaka via
Arpit Agarwal)
HDFS-5830. WebHdfsFileSystem.getFileBlockLocations throws
IllegalArgumentException when accessing another cluster. (Yongjun Zhang via
Colin Patrick McCabe)
HDFS-5833. Fix SecondaryNameNode javadoc. (Bangtao Zhou via Arpit Agarwal)
Release 2.2.0 - 2013-10-13 Release 2.2.0 - 2013-10-13
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -98,9 +98,8 @@ public class LocatedBlock {
} }
this.storageIDs = storageIDs; this.storageIDs = storageIDs;
this.storageTypes = storageTypes; this.storageTypes = storageTypes;
Preconditions.checkArgument(cachedLocs != null,
"cachedLocs should not be null, use a different constructor"); if (cachedLocs == null || cachedLocs.length == 0) {
if (cachedLocs.length == 0) {
this.cachedLocs = EMPTY_LOCS; this.cachedLocs = EMPTY_LOCS;
} else { } else {
this.cachedLocs = cachedLocs; this.cachedLocs = cachedLocs;

View File

@ -69,7 +69,7 @@ public enum FSEditLogOpCodes {
OP_MODIFY_CACHE_DIRECTIVE ((byte) 39), OP_MODIFY_CACHE_DIRECTIVE ((byte) 39),
OP_UPGRADE_MARKER ((byte) 40), OP_UPGRADE_MARKER ((byte) 40),
// Note that fromByte(..) depends on OP_INVALID being at the last position. // Note that the current range of the valid OP code is 0~127
OP_INVALID ((byte) -1); OP_INVALID ((byte) -1);
private final byte opCode; private final byte opCode;
@ -92,7 +92,22 @@ public enum FSEditLogOpCodes {
return opCode; return opCode;
} }
private static final FSEditLogOpCodes[] VALUES = FSEditLogOpCodes.values(); private static FSEditLogOpCodes[] VALUES;
static {
byte max = 0;
for (FSEditLogOpCodes code : FSEditLogOpCodes.values()) {
if (code.getOpCode() > max) {
max = code.getOpCode();
}
}
VALUES = new FSEditLogOpCodes[max + 1];
for (FSEditLogOpCodes code : FSEditLogOpCodes.values()) {
if (code.getOpCode() >= 0) {
VALUES[code.getOpCode()] = code;
}
}
}
/** /**
* Converts byte to FSEditLogOpCodes enum value * Converts byte to FSEditLogOpCodes enum value
@ -101,12 +116,9 @@ public enum FSEditLogOpCodes {
* @return enum with byte value of opCode * @return enum with byte value of opCode
*/ */
public static FSEditLogOpCodes fromByte(byte opCode) { public static FSEditLogOpCodes fromByte(byte opCode) {
if (opCode == -1) { if (opCode >= 0 && opCode < VALUES.length) {
return OP_INVALID;
}
if (opCode >= 0 && opCode < OP_INVALID.ordinal()) {
return VALUES[opCode]; return VALUES[opCode];
} }
return null; return opCode == -1 ? OP_INVALID : null;
} }
} }

View File

@ -90,7 +90,7 @@ import com.google.common.collect.ImmutableList;
* The Secondary NameNode is a daemon that periodically wakes * The Secondary NameNode is a daemon that periodically wakes
* up (determined by the schedule specified in the configuration), * up (determined by the schedule specified in the configuration),
* triggers a periodic checkpoint and then goes back to sleep. * triggers a periodic checkpoint and then goes back to sleep.
* The Secondary NameNode uses the ClientProtocol to talk to the * The Secondary NameNode uses the NamenodeProtocol to talk to the
* primary NameNode. * primary NameNode.
* *
**********************************************************/ **********************************************************/

View File

@ -19,8 +19,6 @@
HDFS Federation HDFS Federation
\[ {{{./index.html}Go Back}} \]
%{toc|section=1|fromDepth=0} %{toc|section=1|fromDepth=0}
This guide provides an overview of the HDFS Federation feature and This guide provides an overview of the HDFS Federation feature and

View File

@ -18,8 +18,6 @@
HDFS High Availability HDFS High Availability
\[ {{{./index.html}Go Back}} \]
%{toc|section=1|fromDepth=0} %{toc|section=1|fromDepth=0}
* {Purpose} * {Purpose}

View File

@ -18,8 +18,6 @@
HDFS High Availability Using the Quorum Journal Manager HDFS High Availability Using the Quorum Journal Manager
\[ {{{./index.html}Go Back}} \]
%{toc|section=1|fromDepth=0} %{toc|section=1|fromDepth=0}
* {Purpose} * {Purpose}

View File

@ -20,8 +20,6 @@
Offline Edits Viewer Guide Offline Edits Viewer Guide
\[ {{{./index.html}Go Back}} \]
%{toc|section=1|fromDepth=0} %{toc|section=1|fromDepth=0}
* Overview * Overview

View File

@ -18,8 +18,6 @@
Offline Image Viewer Guide Offline Image Viewer Guide
\[ {{{./index.html}Go Back}} \]
%{toc|section=1|fromDepth=0} %{toc|section=1|fromDepth=0}
* Overview * Overview
@ -64,9 +62,9 @@ Offline Image Viewer Guide
but no data recorded. The default record delimiter is a tab, but but no data recorded. The default record delimiter is a tab, but
this may be changed via the -delimiter command line argument. This this may be changed via the -delimiter command line argument. This
processor is designed to create output that is easily analyzed by processor is designed to create output that is easily analyzed by
other tools, such as [36]Apache Pig. See the [37]Analyzing Results other tools, such as {{{http://pig.apache.org}Apache Pig}}. See
section for further information on using this processor to analyze the {{Analyzing Results}} section for further information on using
the contents of fsimage files. this processor to analyze the contents of fsimage files.
[[4]] XML creates an XML document of the fsimage and includes all of the [[4]] XML creates an XML document of the fsimage and includes all of the
information within the fsimage, similar to the lsr processor. The information within the fsimage, similar to the lsr processor. The

View File

@ -18,8 +18,6 @@
HDFS Permissions Guide HDFS Permissions Guide
\[ {{{./index.html}Go Back}} \]
%{toc|section=1|fromDepth=0} %{toc|section=1|fromDepth=0}
* Overview * Overview
@ -55,8 +53,10 @@ HDFS Permissions Guide
* If the user name matches the owner of foo, then the owner * If the user name matches the owner of foo, then the owner
permissions are tested; permissions are tested;
* Else if the group of foo matches any of member of the groups list, * Else if the group of foo matches any of member of the groups list,
then the group permissions are tested; then the group permissions are tested;
* Otherwise the other permissions of foo are tested. * Otherwise the other permissions of foo are tested.
If a permissions check fails, the client operation fails. If a permissions check fails, the client operation fails.

View File

@ -18,8 +18,6 @@
HDFS Quotas Guide HDFS Quotas Guide
\[ {{{./index.html}Go Back}} \]
%{toc|section=1|fromDepth=0} %{toc|section=1|fromDepth=0}
* Overview * Overview

View File

@ -108,9 +108,11 @@ HDFS Users Guide
The following documents describe how to install and set up a Hadoop The following documents describe how to install and set up a Hadoop
cluster: cluster:
* {{Single Node Setup}} for first-time users. * {{{../hadoop-common/SingleCluster.html}Single Node Setup}}
for first-time users.
* {{Cluster Setup}} for large, distributed clusters. * {{{../hadoop-common/ClusterSetup.html}Cluster Setup}}
for large, distributed clusters.
The rest of this document assumes the user is able to set up and run a The rest of this document assumes the user is able to set up and run a
HDFS with at least one DataNode. For the purpose of this document, both HDFS with at least one DataNode. For the purpose of this document, both
@ -136,7 +138,8 @@ HDFS Users Guide
for a command. These commands support most of the normal files system for a command. These commands support most of the normal files system
operations like copying files, changing file permissions, etc. It also operations like copying files, changing file permissions, etc. It also
supports a few HDFS specific operations like changing replication of supports a few HDFS specific operations like changing replication of
files. For more information see {{{File System Shell Guide}}}. files. For more information see {{{../hadoop-common/FileSystemShell.html}
File System Shell Guide}}.
** DFSAdmin Command ** DFSAdmin Command
@ -169,7 +172,7 @@ HDFS Users Guide
of racks and datanodes attached to the tracks as viewed by the of racks and datanodes attached to the tracks as viewed by the
NameNode. NameNode.
For command usage, see {{{dfsadmin}}}. For command usage, see {{{../hadoop-common/CommandsManual.html#dfsadmin}dfsadmin}}.
* Secondary NameNode * Secondary NameNode
@ -203,7 +206,8 @@ HDFS Users Guide
So that the check pointed image is always ready to be read by the So that the check pointed image is always ready to be read by the
primary NameNode if necessary. primary NameNode if necessary.
For command usage, see {{{secondarynamenode}}}. For command usage,
see {{{../hadoop-common/CommandsManual.html#secondarynamenode}secondarynamenode}}.
* Checkpoint Node * Checkpoint Node
@ -245,7 +249,7 @@ HDFS Users Guide
Multiple checkpoint nodes may be specified in the cluster configuration Multiple checkpoint nodes may be specified in the cluster configuration
file. file.
For command usage, see {{{namenode}}}. For command usage, see {{{../hadoop-common/CommandsManual.html#namenode}namenode}}.
* Backup Node * Backup Node
@ -287,7 +291,7 @@ HDFS Users Guide
For a complete discussion of the motivation behind the creation of the For a complete discussion of the motivation behind the creation of the
Backup node and Checkpoint node, see {{{https://issues.apache.org/jira/browse/HADOOP-4539}HADOOP-4539}}. Backup node and Checkpoint node, see {{{https://issues.apache.org/jira/browse/HADOOP-4539}HADOOP-4539}}.
For command usage, see {{{namenode}}}. For command usage, see {{{../hadoop-common/CommandsManual.html#namenode}namenode}}.
* Import Checkpoint * Import Checkpoint
@ -310,7 +314,7 @@ HDFS Users Guide
verifies that the image in <<<dfs.namenode.checkpoint.dir>>> is consistent, verifies that the image in <<<dfs.namenode.checkpoint.dir>>> is consistent,
but does not modify it in any way. but does not modify it in any way.
For command usage, see {{{namenode}}}. For command usage, see {{{../hadoop-common/CommandsManual.html#namenode}namenode}}.
* Rebalancer * Rebalancer
@ -337,7 +341,7 @@ HDFS Users Guide
A brief administrator's guide for rebalancer as a PDF is attached to A brief administrator's guide for rebalancer as a PDF is attached to
{{{https://issues.apache.org/jira/browse/HADOOP-1652}HADOOP-1652}}. {{{https://issues.apache.org/jira/browse/HADOOP-1652}HADOOP-1652}}.
For command usage, see {{{balancer}}}. For command usage, see {{{../hadoop-common/CommandsManual.html#balancer}balancer}}.
* Rack Awareness * Rack Awareness
@ -379,8 +383,9 @@ HDFS Users Guide
most of the recoverable failures. By default fsck ignores open files most of the recoverable failures. By default fsck ignores open files
but provides an option to select all files during reporting. The HDFS but provides an option to select all files during reporting. The HDFS
fsck command is not a Hadoop shell command. It can be run as fsck command is not a Hadoop shell command. It can be run as
<<<bin/hadoop fsck>>>. For command usage, see {{{fsck}}}. fsck can be run on the <<<bin/hadoop fsck>>>. For command usage, see
whole file system or on a subset of files. {{{../hadoop-common/CommandsManual.html#fsck}fsck}}. fsck can be run on
the whole file system or on a subset of files.
* fetchdt * fetchdt
@ -393,7 +398,8 @@ HDFS Users Guide
command. It can be run as <<<bin/hadoop fetchdt DTfile>>>. After you got command. It can be run as <<<bin/hadoop fetchdt DTfile>>>. After you got
the token you can run an HDFS command without having Kerberos tickets, the token you can run an HDFS command without having Kerberos tickets,
by pointing <<<HADOOP_TOKEN_FILE_LOCATION>>> environmental variable to the by pointing <<<HADOOP_TOKEN_FILE_LOCATION>>> environmental variable to the
delegation token file. For command usage, see {{{fetchdt}}} command. delegation token file. For command usage, see
{{{../hadoop-common/CommandsManual.html#fetchdt}fetchdt}} command.
* Recovery Mode * Recovery Mode
@ -427,10 +433,11 @@ HDFS Users Guide
let alone to restart HDFS from scratch. HDFS allows administrators to let alone to restart HDFS from scratch. HDFS allows administrators to
go back to earlier version of Hadoop and rollback the cluster to the go back to earlier version of Hadoop and rollback the cluster to the
state it was in before the upgrade. HDFS upgrade is described in more state it was in before the upgrade. HDFS upgrade is described in more
detail in {{{Hadoop Upgrade}}} Wiki page. HDFS can have one such backup at a detail in {{{http://wiki.apache.org/hadoop/Hadoop_Upgrade}Hadoop Upgrade}}
time. Before upgrading, administrators need to remove existing backup Wiki page. HDFS can have one such backup at a time. Before upgrading,
using bin/hadoop dfsadmin <<<-finalizeUpgrade>>> command. The following administrators need to remove existing backupusing bin/hadoop dfsadmin
briefly describes the typical upgrade procedure: <<<-finalizeUpgrade>>> command. The following briefly describes the
typical upgrade procedure:
* Before upgrading Hadoop software, finalize if there an existing * Before upgrading Hadoop software, finalize if there an existing
backup. <<<dfsadmin -upgradeProgress>>> status can tell if the cluster backup. <<<dfsadmin -upgradeProgress>>> status can tell if the cluster
@ -450,7 +457,7 @@ HDFS Users Guide
* stop the cluster and distribute earlier version of Hadoop. * stop the cluster and distribute earlier version of Hadoop.
* start the cluster with rollback option. (<<<bin/start-dfs.h -rollback>>>). * start the cluster with rollback option. (<<<bin/start-dfs.sh -rollback>>>).
* File Permissions and Security * File Permissions and Security
@ -465,14 +472,15 @@ HDFS Users Guide
* Scalability * Scalability
Hadoop currently runs on clusters with thousands of nodes. The Hadoop currently runs on clusters with thousands of nodes. The
{{{PoweredBy}}} Wiki page lists some of the organizations that deploy Hadoop {{{http://wiki.apache.org/hadoop/PoweredBy}PoweredBy}} Wiki page lists
on large clusters. HDFS has one NameNode for each cluster. Currently some of the organizations that deploy Hadoop on large clusters.
the total memory available on NameNode is the primary scalability HDFS has one NameNode for each cluster. Currently the total memory
limitation. On very large clusters, increasing average size of files available on NameNode is the primary scalability limitation.
stored in HDFS helps with increasing cluster size without increasing On very large clusters, increasing average size of files stored in
memory requirements on NameNode. The default configuration may not HDFS helps with increasing cluster size without increasing memory
suite very large clustes. The {{{FAQ}}} Wiki page lists suggested requirements on NameNode. The default configuration may not suite
configuration improvements for large Hadoop clusters. very large clusters. The {{{http://wiki.apache.org/hadoop/FAQ}FAQ}}
Wiki page lists suggested configuration improvements for large Hadoop clusters.
* Related Documentation * Related Documentation
@ -481,19 +489,22 @@ HDFS Users Guide
documentation about Hadoop and HDFS. The following list is a starting documentation about Hadoop and HDFS. The following list is a starting
point for further exploration: point for further exploration:
* {{{Hadoop Site}}}: The home page for the Apache Hadoop site. * {{{http://hadoop.apache.org}Hadoop Site}}: The home page for
the Apache Hadoop site.
* {{{Hadoop Wiki}}}: The home page (FrontPage) for the Hadoop Wiki. Unlike * {{{http://wiki.apache.org/hadoop/FrontPage}Hadoop Wiki}}:
The home page (FrontPage) for the Hadoop Wiki. Unlike
the released documentation, which is part of Hadoop source tree, the released documentation, which is part of Hadoop source tree,
Hadoop Wiki is regularly edited by Hadoop Community. Hadoop Wiki is regularly edited by Hadoop Community.
* {{{FAQ}}}: The FAQ Wiki page. * {{{http://wiki.apache.org/hadoop/FAQ}FAQ}}: The FAQ Wiki page.
* {{{Hadoop JavaDoc API}}}. * {{{../../api/index.html}Hadoop JavaDoc API}}.
* {{{Hadoop User Mailing List}}}: core-user[at]hadoop.apache.org. * Hadoop User Mailing List: user[at]hadoop.apache.org.
* Explore {{{src/hdfs/hdfs-default.xml}}}. It includes brief description of * Explore {{{./hdfs-default.xml}hdfs-default.xml}}. It includes
most of the configuration variables available. brief description of most of the configuration variables available.
* {{{Hadoop Commands Guide}}}: Hadoop commands usage. * {{{../hadoop-common/CommandsManual.html}Hadoop Commands Guide}}:
Hadoop commands usage.

View File

@ -18,8 +18,6 @@
HFTP Guide HFTP Guide
\[ {{{./index.html}Go Back}} \]
%{toc|section=1|fromDepth=0} %{toc|section=1|fromDepth=0}
* Introduction * Introduction

View File

@ -19,8 +19,6 @@
HDFS Short-Circuit Local Reads HDFS Short-Circuit Local Reads
\[ {{{./index.html}Go Back}} \]
%{toc|section=1|fromDepth=0} %{toc|section=1|fromDepth=0}
* {Background} * {Background}

View File

@ -18,8 +18,6 @@
WebHDFS REST API WebHDFS REST API
\[ {{{./index.html}Go Back}} \]
%{toc|section=1|fromDepth=0} %{toc|section=1|fromDepth=0}
* {Document Conventions} * {Document Conventions}
@ -54,7 +52,7 @@ WebHDFS REST API
* {{{Status of a File/Directory}<<<GETFILESTATUS>>>}} * {{{Status of a File/Directory}<<<GETFILESTATUS>>>}}
(see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.getFileStatus) (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.getFileStatus)
* {{<<<LISTSTATUS>>>}} * {{{List a Directory}<<<LISTSTATUS>>>}}
(see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.listStatus) (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.listStatus)
* {{{Get Content Summary of a Directory}<<<GETCONTENTSUMMARY>>>}} * {{{Get Content Summary of a Directory}<<<GETCONTENTSUMMARY>>>}}
@ -109,7 +107,7 @@ WebHDFS REST API
* {{{Append to a File}<<<APPEND>>>}} * {{{Append to a File}<<<APPEND>>>}}
(see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.append) (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.append)
* {{{Concatenate Files}<<<CONCAT>>>}} * {{{Concat File(s)}<<<CONCAT>>>}}
(see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.concat) (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.concat)
* HTTP DELETE * HTTP DELETE
@ -871,7 +869,7 @@ Content-Length: 0
* {Error Responses} * {Error Responses}
When an operation fails, the server may throw an exception. When an operation fails, the server may throw an exception.
The JSON schema of error responses is defined in {{<<<RemoteException>>> JSON schema}}. The JSON schema of error responses is defined in {{{RemoteException JSON Schema}}}.
The table below shows the mapping from exceptions to HTTP response codes. The table below shows the mapping from exceptions to HTTP response codes.
** {HTTP Response Codes} ** {HTTP Response Codes}
@ -1119,7 +1117,7 @@ Transfer-Encoding: chunked
See also: See also:
{{{FileStatus Properties}<<<FileStatus>>> Properties}}, {{{FileStatus Properties}<<<FileStatus>>> Properties}},
{{{Status of a File/Directory}<<<GETFILESTATUS>>>}}, {{{Status of a File/Directory}<<<GETFILESTATUS>>>}},
{{{../../api/org/apache/hadoop/fs/FileStatus}FileStatus}} {{{../../api/org/apache/hadoop/fs/FileStatus.html}FileStatus}}
*** {FileStatus Properties} *** {FileStatus Properties}
@ -1232,7 +1230,7 @@ var fileStatusProperties =
See also: See also:
{{{FileStatus Properties}<<<FileStatus>>> Properties}}, {{{FileStatus Properties}<<<FileStatus>>> Properties}},
{{{List a Directory}<<<LISTSTATUS>>>}}, {{{List a Directory}<<<LISTSTATUS>>>}},
{{{../../api/org/apache/hadoop/fs/FileStatus}FileStatus}} {{{../../api/org/apache/hadoop/fs/FileStatus.html}FileStatus}}
** {Long JSON Schema} ** {Long JSON Schema}
@ -1275,7 +1273,7 @@ var fileStatusProperties =
See also: See also:
{{{Get Home Directory}<<<GETHOMEDIRECTORY>>>}}, {{{Get Home Directory}<<<GETHOMEDIRECTORY>>>}},
{{{../../api/org/apache/hadoop/fs/Path}Path}} {{{../../api/org/apache/hadoop/fs/Path.html}Path}}
** {RemoteException JSON Schema} ** {RemoteException JSON Schema}

View File

@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs;
import com.google.common.base.Charsets; import com.google.common.base.Charsets;
import com.google.common.base.Joiner; import com.google.common.base.Joiner;
import org.apache.commons.io.FileUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@ -27,7 +29,6 @@ import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileSystem.Statistics; import org.apache.hadoop.fs.FileSystem.Statistics;
import org.apache.hadoop.fs.Options.Rename; import org.apache.hadoop.fs.Options.Rename;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.MiniDFSCluster.NameNodeInfo; import org.apache.hadoop.hdfs.MiniDFSCluster.NameNodeInfo;
import org.apache.hadoop.hdfs.client.HdfsDataInputStream; import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
@ -889,21 +890,7 @@ public class DFSTestUtil {
/** Copy one file's contents into the other **/ /** Copy one file's contents into the other **/
public static void copyFile(File src, File dest) throws IOException { public static void copyFile(File src, File dest) throws IOException {
InputStream in = null; FileUtils.copyFile(src, dest);
OutputStream out = null;
try {
in = new FileInputStream(src);
out = new FileOutputStream(dest);
byte [] b = new byte[1024];
while( in.read(b) > 0 ) {
out.write(b);
}
} finally {
if(in != null) in.close();
if(out != null) out.close();
}
} }
public static class Builder { public static class Builder {

View File

@ -118,6 +118,20 @@ public class TestDFSUtil {
assertEquals(0, bs.length); assertEquals(0, bs.length);
} }
/**
* Test constructing LocatedBlock with null cachedLocs
*/
@Test
public void testLocatedBlockConstructorWithNullCachedLocs() {
DatanodeInfo d = DFSTestUtil.getLocalDatanodeInfo();
DatanodeInfo[] ds = new DatanodeInfo[1];
ds[0] = d;
ExtendedBlock b1 = new ExtendedBlock("bpid", 1, 1, 1);
LocatedBlock l1 = new LocatedBlock(b1, ds, null, null, 0, false, null);
final DatanodeInfo[] cachedLocs = l1.getCachedLocations();
assertTrue(cachedLocs.length == 0);
}
private Configuration setupAddress(String key) { private Configuration setupAddress(String key) {
HdfsConfiguration conf = new HdfsConfiguration(); HdfsConfiguration conf = new HdfsConfiguration();

View File

@ -59,6 +59,7 @@
<item name="CLI Mini Cluster" href="hadoop-project-dist/hadoop-common/CLIMiniCluster.html"/> <item name="CLI Mini Cluster" href="hadoop-project-dist/hadoop-common/CLIMiniCluster.html"/>
<item name="Native Libraries" href="hadoop-project-dist/hadoop-common/NativeLibraries.html"/> <item name="Native Libraries" href="hadoop-project-dist/hadoop-common/NativeLibraries.html"/>
<item name="Superusers" href="hadoop-project-dist/hadoop-common/Superusers.html"/> <item name="Superusers" href="hadoop-project-dist/hadoop-common/Superusers.html"/>
<item name="Secure Mode" href="hadoop-project-dist/hadoop-common/SecureMode.html"/>
<item name="Service Level Authorization" href="hadoop-project-dist/hadoop-common/ServiceLevelAuth.html"/> <item name="Service Level Authorization" href="hadoop-project-dist/hadoop-common/ServiceLevelAuth.html"/>
<item name="HTTP Authentication" href="hadoop-project-dist/hadoop-common/HttpAuthentication.html"/> <item name="HTTP Authentication" href="hadoop-project-dist/hadoop-common/HttpAuthentication.html"/>
</menu> </menu>