Merge r1555021 through r1561943 from trunk.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-5535@1561944 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
efcdf81ca9
@ -24,8 +24,7 @@ Configuration
|
||||
|
||||
* Server Side Configuration Setup
|
||||
|
||||
The {{{./apidocs/org/apache/hadoop/auth/server/AuthenticationFilter.html}
|
||||
AuthenticationFilter filter}} is Hadoop Auth's server side component.
|
||||
The AuthenticationFilter filter is Hadoop Auth's server side component.
|
||||
|
||||
This filter must be configured in front of all the web application resources
|
||||
that required authenticated requests. For example:
|
||||
@ -46,9 +45,7 @@ Configuration
|
||||
must start with the prefix. The default value is no prefix.
|
||||
|
||||
* <<<[PREFIX.]type>>>: the authentication type keyword (<<<simple>>> or
|
||||
<<<kerberos>>>) or a
|
||||
{{{./apidocs/org/apache/hadoop/auth/server/AuthenticationHandler.html}
|
||||
Authentication handler implementation}}.
|
||||
<<<kerberos>>>) or a Authentication handler implementation.
|
||||
|
||||
* <<<[PREFIX.]signature.secret>>>: The secret to SHA-sign the generated
|
||||
authentication tokens. If a secret is not provided a random secret is
|
||||
|
@ -52,7 +52,3 @@ Hadoop Auth, Java HTTP SPNEGO ${project.version}
|
||||
|
||||
* {{{./BuildingIt.html}Building It}}
|
||||
|
||||
* {{{./apidocs/index.html}JavaDocs}}
|
||||
|
||||
* {{{./dependencies.html}Dependencies}}
|
||||
|
||||
|
@ -536,6 +536,15 @@ Release 2.4.0 - UNRELEASED
|
||||
HADOOP-10252. HttpServer can't start if hostname is not specified. (Jimmy
|
||||
Xiang via atm)
|
||||
|
||||
HADOOP-10203. Connection leak in
|
||||
Jets3tNativeFileSystemStore#retrieveMetadata. (Andrei Savu via atm)
|
||||
|
||||
HADOOP-10250. VersionUtil returns wrong value when comparing two versions.
|
||||
(Yongjun Zhang via atm)
|
||||
|
||||
HADOOP-10288. Explicit reference to Log4JLogger breaks non-log4j users
|
||||
(todd)
|
||||
|
||||
Release 2.3.0 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
@ -559,6 +568,12 @@ Release 2.3.0 - UNRELEASED
|
||||
HADOOP-10248. Property name should be included in the exception where property value
|
||||
is null (Akira AJISAKA via umamahesh)
|
||||
|
||||
HADOOP-10086. User document for authentication in secure cluster.
|
||||
(Masatake Iwasaki via Arpit Agarwal)
|
||||
|
||||
HADOOP-10274 Lower the logging level from ERROR to WARN for UGI.doAs method
|
||||
(Takeshi Miao via stack)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HADOOP-10142. Avoid groups lookup for unprivileged users such as "dr.who"
|
||||
@ -637,6 +652,12 @@ Release 2.3.0 - UNRELEASED
|
||||
HADOOP-10167. Mark hadoop-common source as UTF-8 in Maven pom files / refactoring
|
||||
(Mikhail Antonov via cos)
|
||||
|
||||
HADOOP-9982. Fix dead links in hadoop site docs. (Akira Ajisaka via Arpit
|
||||
Agarwal)
|
||||
|
||||
HADOOP-10212. Incorrect compile command in Native Library document.
|
||||
(Akira Ajisaka via Arpit Agarwal)
|
||||
|
||||
Release 2.2.0 - 2013-10-13
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -364,4 +364,11 @@
|
||||
<Bug pattern="OBL_UNSATISFIED_OBLIGATION"/>
|
||||
</Match>
|
||||
|
||||
<!-- code from maven source, null value is checked at callee side. -->
|
||||
<Match>
|
||||
<Class name="org.apache.hadoop.util.ComparableVersion$ListItem" />
|
||||
<Method name="compareTo" />
|
||||
<Bug code="NP" />
|
||||
</Match>
|
||||
|
||||
</FindBugsFilter>
|
||||
|
@ -110,23 +110,29 @@ class Jets3tNativeFileSystemStore implements NativeFileSystemStore {
|
||||
handleS3ServiceException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public FileMetadata retrieveMetadata(String key) throws IOException {
|
||||
StorageObject object = null;
|
||||
try {
|
||||
if(LOG.isDebugEnabled()) {
|
||||
LOG.debug("Getting metadata for key: " + key + " from bucket:" + bucket.getName());
|
||||
}
|
||||
S3Object object = s3Service.getObject(bucket.getName(), key);
|
||||
object = s3Service.getObjectDetails(bucket.getName(), key);
|
||||
return new FileMetadata(key, object.getContentLength(),
|
||||
object.getLastModifiedDate().getTime());
|
||||
} catch (S3ServiceException e) {
|
||||
|
||||
} catch (ServiceException e) {
|
||||
// Following is brittle. Is there a better way?
|
||||
if (e.getS3ErrorCode().matches("NoSuchKey")) {
|
||||
if ("NoSuchKey".equals(e.getErrorCode())) {
|
||||
return null; //return null if key not found
|
||||
}
|
||||
handleS3ServiceException(e);
|
||||
handleServiceException(e);
|
||||
return null; //never returned - keep compiler happy
|
||||
} finally {
|
||||
if (object != null) {
|
||||
object.closeDataInputStream();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -53,7 +53,17 @@ public class HttpRequestLog {
|
||||
String appenderName = name + "requestlog";
|
||||
Log logger = LogFactory.getLog(loggerName);
|
||||
|
||||
if (logger instanceof Log4JLogger) {
|
||||
boolean isLog4JLogger;;
|
||||
try {
|
||||
isLog4JLogger = logger instanceof Log4JLogger;
|
||||
} catch (NoClassDefFoundError err) {
|
||||
// In some dependent projects, log4j may not even be on the classpath at
|
||||
// runtime, in which case the above instanceof check will throw
|
||||
// NoClassDefFoundError.
|
||||
LOG.debug("Could not load Log4JLogger class", err);
|
||||
isLog4JLogger = false;
|
||||
}
|
||||
if (isLog4JLogger) {
|
||||
Log4JLogger httpLog4JLog = (Log4JLogger)logger;
|
||||
Logger httpLogger = httpLog4JLog.getLogger();
|
||||
Appender appender = null;
|
||||
|
@ -1560,7 +1560,7 @@ public class UserGroupInformation {
|
||||
return Subject.doAs(subject, action);
|
||||
} catch (PrivilegedActionException pae) {
|
||||
Throwable cause = pae.getCause();
|
||||
LOG.error("PriviledgedActionException as:"+this+" cause:"+cause);
|
||||
LOG.warn("PriviledgedActionException as:"+this+" cause:"+cause);
|
||||
if (cause instanceof IOException) {
|
||||
throw (IOException) cause;
|
||||
} else if (cause instanceof Error) {
|
||||
|
@ -0,0 +1,479 @@
|
||||
// Code source of this file:
|
||||
// http://grepcode.com/file/repo1.maven.org/maven2/
|
||||
// org.apache.maven/maven-artifact/3.1.1/
|
||||
// org/apache/maven/artifact/versioning/ComparableVersion.java/
|
||||
//
|
||||
// Modifications made on top of the source:
|
||||
// 1. Changed
|
||||
// package org.apache.maven.artifact.versioning;
|
||||
// to
|
||||
// package org.apache.hadoop.util;
|
||||
// 2. Removed author tags to clear hadoop author tag warning
|
||||
// author <a href="mailto:kenney@apache.org">Kenney Westerhof</a>
|
||||
// author <a href="mailto:hboutemy@apache.org">Hervé Boutemy</a>
|
||||
//
|
||||
package org.apache.hadoop.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
import java.math.BigInteger;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.ListIterator;
|
||||
import java.util.Locale;
|
||||
import java.util.Properties;
|
||||
import java.util.Stack;
|
||||
|
||||
/**
|
||||
* Generic implementation of version comparison.
|
||||
*
|
||||
* <p>Features:
|
||||
* <ul>
|
||||
* <li>mixing of '<code>-</code>' (dash) and '<code>.</code>' (dot) separators,</li>
|
||||
* <li>transition between characters and digits also constitutes a separator:
|
||||
* <code>1.0alpha1 => [1, 0, alpha, 1]</code></li>
|
||||
* <li>unlimited number of version components,</li>
|
||||
* <li>version components in the text can be digits or strings,</li>
|
||||
* <li>strings are checked for well-known qualifiers and the qualifier ordering is used for version ordering.
|
||||
* Well-known qualifiers (case insensitive) are:<ul>
|
||||
* <li><code>alpha</code> or <code>a</code></li>
|
||||
* <li><code>beta</code> or <code>b</code></li>
|
||||
* <li><code>milestone</code> or <code>m</code></li>
|
||||
* <li><code>rc</code> or <code>cr</code></li>
|
||||
* <li><code>snapshot</code></li>
|
||||
* <li><code>(the empty string)</code> or <code>ga</code> or <code>final</code></li>
|
||||
* <li><code>sp</code></li>
|
||||
* </ul>
|
||||
* Unknown qualifiers are considered after known qualifiers, with lexical order (always case insensitive),
|
||||
* </li>
|
||||
* <li>a dash usually precedes a qualifier, and is always less important than something preceded with a dot.</li>
|
||||
* </ul></p>
|
||||
*
|
||||
* @see <a href="https://cwiki.apache.org/confluence/display/MAVENOLD/Versioning">"Versioning" on Maven Wiki</a>
|
||||
*/
|
||||
public class ComparableVersion
|
||||
implements Comparable<ComparableVersion>
|
||||
{
|
||||
private String value;
|
||||
|
||||
private String canonical;
|
||||
|
||||
private ListItem items;
|
||||
|
||||
private interface Item
|
||||
{
|
||||
int INTEGER_ITEM = 0;
|
||||
int STRING_ITEM = 1;
|
||||
int LIST_ITEM = 2;
|
||||
|
||||
int compareTo( Item item );
|
||||
|
||||
int getType();
|
||||
|
||||
boolean isNull();
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a numeric item in the version item list.
|
||||
*/
|
||||
private static class IntegerItem
|
||||
implements Item
|
||||
{
|
||||
private static final BigInteger BIG_INTEGER_ZERO = new BigInteger( "0" );
|
||||
|
||||
private final BigInteger value;
|
||||
|
||||
public static final IntegerItem ZERO = new IntegerItem();
|
||||
|
||||
private IntegerItem()
|
||||
{
|
||||
this.value = BIG_INTEGER_ZERO;
|
||||
}
|
||||
|
||||
public IntegerItem( String str )
|
||||
{
|
||||
this.value = new BigInteger( str );
|
||||
}
|
||||
|
||||
public int getType()
|
||||
{
|
||||
return INTEGER_ITEM;
|
||||
}
|
||||
|
||||
public boolean isNull()
|
||||
{
|
||||
return BIG_INTEGER_ZERO.equals( value );
|
||||
}
|
||||
|
||||
public int compareTo( Item item )
|
||||
{
|
||||
if ( item == null )
|
||||
{
|
||||
return BIG_INTEGER_ZERO.equals( value ) ? 0 : 1; // 1.0 == 1, 1.1 > 1
|
||||
}
|
||||
|
||||
switch ( item.getType() )
|
||||
{
|
||||
case INTEGER_ITEM:
|
||||
return value.compareTo( ( (IntegerItem) item ).value );
|
||||
|
||||
case STRING_ITEM:
|
||||
return 1; // 1.1 > 1-sp
|
||||
|
||||
case LIST_ITEM:
|
||||
return 1; // 1.1 > 1-1
|
||||
|
||||
default:
|
||||
throw new RuntimeException( "invalid item: " + item.getClass() );
|
||||
}
|
||||
}
|
||||
|
||||
public String toString()
|
||||
{
|
||||
return value.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a string in the version item list, usually a qualifier.
|
||||
*/
|
||||
private static class StringItem
|
||||
implements Item
|
||||
{
|
||||
private static final String[] QUALIFIERS = { "alpha", "beta", "milestone", "rc", "snapshot", "", "sp" };
|
||||
|
||||
private static final List<String> _QUALIFIERS = Arrays.asList( QUALIFIERS );
|
||||
|
||||
private static final Properties ALIASES = new Properties();
|
||||
static
|
||||
{
|
||||
ALIASES.put( "ga", "" );
|
||||
ALIASES.put( "final", "" );
|
||||
ALIASES.put( "cr", "rc" );
|
||||
}
|
||||
|
||||
/**
|
||||
* A comparable value for the empty-string qualifier. This one is used to determine if a given qualifier makes
|
||||
* the version older than one without a qualifier, or more recent.
|
||||
*/
|
||||
private static final String RELEASE_VERSION_INDEX = String.valueOf( _QUALIFIERS.indexOf( "" ) );
|
||||
|
||||
private String value;
|
||||
|
||||
public StringItem( String value, boolean followedByDigit )
|
||||
{
|
||||
if ( followedByDigit && value.length() == 1 )
|
||||
{
|
||||
// a1 = alpha-1, b1 = beta-1, m1 = milestone-1
|
||||
switch ( value.charAt( 0 ) )
|
||||
{
|
||||
case 'a':
|
||||
value = "alpha";
|
||||
break;
|
||||
case 'b':
|
||||
value = "beta";
|
||||
break;
|
||||
case 'm':
|
||||
value = "milestone";
|
||||
break;
|
||||
}
|
||||
}
|
||||
this.value = ALIASES.getProperty( value , value );
|
||||
}
|
||||
|
||||
public int getType()
|
||||
{
|
||||
return STRING_ITEM;
|
||||
}
|
||||
|
||||
public boolean isNull()
|
||||
{
|
||||
return ( comparableQualifier( value ).compareTo( RELEASE_VERSION_INDEX ) == 0 );
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a comparable value for a qualifier.
|
||||
*
|
||||
* This method takes into account the ordering of known qualifiers then unknown qualifiers with lexical ordering.
|
||||
*
|
||||
* just returning an Integer with the index here is faster, but requires a lot of if/then/else to check for -1
|
||||
* or QUALIFIERS.size and then resort to lexical ordering. Most comparisons are decided by the first character,
|
||||
* so this is still fast. If more characters are needed then it requires a lexical sort anyway.
|
||||
*
|
||||
* @param qualifier
|
||||
* @return an equivalent value that can be used with lexical comparison
|
||||
*/
|
||||
public static String comparableQualifier( String qualifier )
|
||||
{
|
||||
int i = _QUALIFIERS.indexOf( qualifier );
|
||||
|
||||
return i == -1 ? ( _QUALIFIERS.size() + "-" + qualifier ) : String.valueOf( i );
|
||||
}
|
||||
|
||||
public int compareTo( Item item )
|
||||
{
|
||||
if ( item == null )
|
||||
{
|
||||
// 1-rc < 1, 1-ga > 1
|
||||
return comparableQualifier( value ).compareTo( RELEASE_VERSION_INDEX );
|
||||
}
|
||||
switch ( item.getType() )
|
||||
{
|
||||
case INTEGER_ITEM:
|
||||
return -1; // 1.any < 1.1 ?
|
||||
|
||||
case STRING_ITEM:
|
||||
return comparableQualifier( value ).compareTo( comparableQualifier( ( (StringItem) item ).value ) );
|
||||
|
||||
case LIST_ITEM:
|
||||
return -1; // 1.any < 1-1
|
||||
|
||||
default:
|
||||
throw new RuntimeException( "invalid item: " + item.getClass() );
|
||||
}
|
||||
}
|
||||
|
||||
public String toString()
|
||||
{
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a version list item. This class is used both for the global item list and for sub-lists (which start
|
||||
* with '-(number)' in the version specification).
|
||||
*/
|
||||
private static class ListItem
|
||||
extends ArrayList<Item>
|
||||
implements Item
|
||||
{
|
||||
public int getType()
|
||||
{
|
||||
return LIST_ITEM;
|
||||
}
|
||||
|
||||
public boolean isNull()
|
||||
{
|
||||
return ( size() == 0 );
|
||||
}
|
||||
|
||||
void normalize()
|
||||
{
|
||||
for ( ListIterator<Item> iterator = listIterator( size() ); iterator.hasPrevious(); )
|
||||
{
|
||||
Item item = iterator.previous();
|
||||
if ( item.isNull() )
|
||||
{
|
||||
iterator.remove(); // remove null trailing items: 0, "", empty list
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int compareTo( Item item )
|
||||
{
|
||||
if ( item == null )
|
||||
{
|
||||
if ( size() == 0 )
|
||||
{
|
||||
return 0; // 1-0 = 1- (normalize) = 1
|
||||
}
|
||||
Item first = get( 0 );
|
||||
return first.compareTo( null );
|
||||
}
|
||||
switch ( item.getType() )
|
||||
{
|
||||
case INTEGER_ITEM:
|
||||
return -1; // 1-1 < 1.0.x
|
||||
|
||||
case STRING_ITEM:
|
||||
return 1; // 1-1 > 1-sp
|
||||
|
||||
case LIST_ITEM:
|
||||
Iterator<Item> left = iterator();
|
||||
Iterator<Item> right = ( (ListItem) item ).iterator();
|
||||
|
||||
while ( left.hasNext() || right.hasNext() )
|
||||
{
|
||||
Item l = left.hasNext() ? left.next() : null;
|
||||
Item r = right.hasNext() ? right.next() : null;
|
||||
|
||||
// if this is shorter, then invert the compare and mul with -1
|
||||
int result = l == null ? -1 * r.compareTo( l ) : l.compareTo( r );
|
||||
|
||||
if ( result != 0 )
|
||||
{
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
default:
|
||||
throw new RuntimeException( "invalid item: " + item.getClass() );
|
||||
}
|
||||
}
|
||||
|
||||
public String toString()
|
||||
{
|
||||
StringBuilder buffer = new StringBuilder( "(" );
|
||||
for ( Iterator<Item> iter = iterator(); iter.hasNext(); )
|
||||
{
|
||||
buffer.append( iter.next() );
|
||||
if ( iter.hasNext() )
|
||||
{
|
||||
buffer.append( ',' );
|
||||
}
|
||||
}
|
||||
buffer.append( ')' );
|
||||
return buffer.toString();
|
||||
}
|
||||
}
|
||||
|
||||
public ComparableVersion( String version )
|
||||
{
|
||||
parseVersion( version );
|
||||
}
|
||||
|
||||
public final void parseVersion( String version )
|
||||
{
|
||||
this.value = version;
|
||||
|
||||
items = new ListItem();
|
||||
|
||||
version = version.toLowerCase( Locale.ENGLISH );
|
||||
|
||||
ListItem list = items;
|
||||
|
||||
Stack<Item> stack = new Stack<Item>();
|
||||
stack.push( list );
|
||||
|
||||
boolean isDigit = false;
|
||||
|
||||
int startIndex = 0;
|
||||
|
||||
for ( int i = 0; i < version.length(); i++ )
|
||||
{
|
||||
char c = version.charAt( i );
|
||||
|
||||
if ( c == '.' )
|
||||
{
|
||||
if ( i == startIndex )
|
||||
{
|
||||
list.add( IntegerItem.ZERO );
|
||||
}
|
||||
else
|
||||
{
|
||||
list.add( parseItem( isDigit, version.substring( startIndex, i ) ) );
|
||||
}
|
||||
startIndex = i + 1;
|
||||
}
|
||||
else if ( c == '-' )
|
||||
{
|
||||
if ( i == startIndex )
|
||||
{
|
||||
list.add( IntegerItem.ZERO );
|
||||
}
|
||||
else
|
||||
{
|
||||
list.add( parseItem( isDigit, version.substring( startIndex, i ) ) );
|
||||
}
|
||||
startIndex = i + 1;
|
||||
|
||||
if ( isDigit )
|
||||
{
|
||||
list.normalize(); // 1.0-* = 1-*
|
||||
|
||||
if ( ( i + 1 < version.length() ) && Character.isDigit( version.charAt( i + 1 ) ) )
|
||||
{
|
||||
// new ListItem only if previous were digits and new char is a digit,
|
||||
// ie need to differentiate only 1.1 from 1-1
|
||||
list.add( list = new ListItem() );
|
||||
|
||||
stack.push( list );
|
||||
}
|
||||
}
|
||||
}
|
||||
else if ( Character.isDigit( c ) )
|
||||
{
|
||||
if ( !isDigit && i > startIndex )
|
||||
{
|
||||
list.add( new StringItem( version.substring( startIndex, i ), true ) );
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
isDigit = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( isDigit && i > startIndex )
|
||||
{
|
||||
list.add( parseItem( true, version.substring( startIndex, i ) ) );
|
||||
startIndex = i;
|
||||
}
|
||||
|
||||
isDigit = false;
|
||||
}
|
||||
}
|
||||
|
||||
if ( version.length() > startIndex )
|
||||
{
|
||||
list.add( parseItem( isDigit, version.substring( startIndex ) ) );
|
||||
}
|
||||
|
||||
while ( !stack.isEmpty() )
|
||||
{
|
||||
list = (ListItem) stack.pop();
|
||||
list.normalize();
|
||||
}
|
||||
|
||||
canonical = items.toString();
|
||||
}
|
||||
|
||||
private static Item parseItem( boolean isDigit, String buf )
|
||||
{
|
||||
return isDigit ? new IntegerItem( buf ) : new StringItem( buf, false );
|
||||
}
|
||||
|
||||
public int compareTo( ComparableVersion o )
|
||||
{
|
||||
return items.compareTo( o.items );
|
||||
}
|
||||
|
||||
public String toString()
|
||||
{
|
||||
return value;
|
||||
}
|
||||
|
||||
public boolean equals( Object o )
|
||||
{
|
||||
return ( o instanceof ComparableVersion ) && canonical.equals( ( (ComparableVersion) o ).canonical );
|
||||
}
|
||||
|
||||
public int hashCode()
|
||||
{
|
||||
return canonical.hashCode();
|
||||
}
|
||||
}
|
@ -17,55 +17,17 @@
|
||||
*/
|
||||
package org.apache.hadoop.util;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
import com.google.common.collect.ComparisonChain;
|
||||
|
||||
/**
|
||||
* A wrapper class to maven's ComparableVersion class, to comply
|
||||
* with maven's version name string convention
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public abstract class VersionUtil {
|
||||
|
||||
private static final Pattern COMPONENT_GROUPS = Pattern.compile("(\\d+)|(\\D+)");
|
||||
|
||||
/**
|
||||
* Suffix added by maven for nightly builds and other snapshot releases.
|
||||
* These releases are considered to precede the non-SNAPSHOT version
|
||||
* with the same version number.
|
||||
*/
|
||||
private static final String SNAPSHOT_SUFFIX = "-SNAPSHOT";
|
||||
|
||||
/**
|
||||
* This function splits the two versions on "." and performs a
|
||||
* naturally-ordered comparison of the resulting components. For example, the
|
||||
* version string "0.3" is considered to precede "0.20", despite the fact that
|
||||
* lexical comparison would consider "0.20" to precede "0.3". This method of
|
||||
* comparison is similar to the method used by package versioning systems like
|
||||
* deb and RPM.
|
||||
*
|
||||
* Version components are compared numerically whenever possible, however a
|
||||
* version component can contain non-numeric characters. When a non-numeric
|
||||
* group of characters is found in a version component, this group is compared
|
||||
* with the similarly-indexed group in the other version component. If the
|
||||
* other group is numeric, then the numeric group is considered to precede the
|
||||
* non-numeric group. If both groups are non-numeric, then a lexical
|
||||
* comparison is performed.
|
||||
*
|
||||
* If two versions have a different number of components, then only the lower
|
||||
* number of components are compared. If those components are identical
|
||||
* between the two versions, then the version with fewer components is
|
||||
* considered to precede the version with more components.
|
||||
*
|
||||
* In addition to the above rules, there is one special case: maven SNAPSHOT
|
||||
* releases are considered to precede a non-SNAPSHOT release with an
|
||||
* otherwise identical version number. For example, 2.0-SNAPSHOT precedes
|
||||
* 2.0.
|
||||
*
|
||||
* This function returns a negative integer if version1 precedes version2, a
|
||||
* positive integer if version2 precedes version1, and 0 if and only if the
|
||||
* two versions' components are identical in value and cardinality.
|
||||
*
|
||||
* Compares two version name strings using maven's ComparableVersion class.
|
||||
*
|
||||
* @param version1
|
||||
* the first version to compare
|
||||
* @param version2
|
||||
@ -75,58 +37,8 @@ public abstract class VersionUtil {
|
||||
* versions are equal.
|
||||
*/
|
||||
public static int compareVersions(String version1, String version2) {
|
||||
boolean isSnapshot1 = version1.endsWith(SNAPSHOT_SUFFIX);
|
||||
boolean isSnapshot2 = version2.endsWith(SNAPSHOT_SUFFIX);
|
||||
version1 = stripSnapshotSuffix(version1);
|
||||
version2 = stripSnapshotSuffix(version2);
|
||||
|
||||
String[] version1Parts = version1.split("\\.");
|
||||
String[] version2Parts = version2.split("\\.");
|
||||
|
||||
for (int i = 0; i < version1Parts.length && i < version2Parts.length; i++) {
|
||||
String component1 = version1Parts[i];
|
||||
String component2 = version2Parts[i];
|
||||
if (!component1.equals(component2)) {
|
||||
Matcher matcher1 = COMPONENT_GROUPS.matcher(component1);
|
||||
Matcher matcher2 = COMPONENT_GROUPS.matcher(component2);
|
||||
|
||||
while (matcher1.find() && matcher2.find()) {
|
||||
String group1 = matcher1.group();
|
||||
String group2 = matcher2.group();
|
||||
if (!group1.equals(group2)) {
|
||||
if (isNumeric(group1) && isNumeric(group2)) {
|
||||
return Integer.parseInt(group1) - Integer.parseInt(group2);
|
||||
} else if (!isNumeric(group1) && !isNumeric(group2)) {
|
||||
return group1.compareTo(group2);
|
||||
} else {
|
||||
return isNumeric(group1) ? -1 : 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return component1.length() - component2.length();
|
||||
}
|
||||
}
|
||||
|
||||
return ComparisonChain.start()
|
||||
.compare(version1Parts.length, version2Parts.length)
|
||||
.compare(isSnapshot2, isSnapshot1)
|
||||
.result();
|
||||
}
|
||||
|
||||
private static String stripSnapshotSuffix(String version) {
|
||||
if (version.endsWith(SNAPSHOT_SUFFIX)) {
|
||||
return version.substring(0, version.length() - SNAPSHOT_SUFFIX.length());
|
||||
} else {
|
||||
return version;
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isNumeric(String s) {
|
||||
try {
|
||||
Integer.parseInt(s);
|
||||
return true;
|
||||
} catch (NumberFormatException nfe) {
|
||||
return false;
|
||||
}
|
||||
ComparableVersion v1 = new ComparableVersion(version1);
|
||||
ComparableVersion v2 = new ComparableVersion(version2);
|
||||
return v1.compareTo(v2);
|
||||
}
|
||||
}
|
||||
|
@ -18,8 +18,6 @@
|
||||
|
||||
Hadoop MapReduce Next Generation - CLI MiniCluster.
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* {Purpose}
|
||||
@ -42,7 +40,8 @@ Hadoop MapReduce Next Generation - CLI MiniCluster.
|
||||
$ mvn clean install -DskipTests
|
||||
$ mvn package -Pdist -Dtar -DskipTests -Dmaven.javadoc.skip
|
||||
+---+
|
||||
<<NOTE:>> You will need protoc 2.5.0 installed.
|
||||
<<NOTE:>> You will need {{{http://code.google.com/p/protobuf/}protoc 2.5.0}}
|
||||
installed.
|
||||
|
||||
The tarball should be available in <<<hadoop-dist/target/>>> directory.
|
||||
|
||||
|
@ -16,8 +16,6 @@
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
\[ {{{../index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
Hadoop MapReduce Next Generation - Cluster Setup
|
||||
@ -29,7 +27,7 @@ Hadoop MapReduce Next Generation - Cluster Setup
|
||||
with thousands of nodes.
|
||||
|
||||
To play with Hadoop, you may first want to install it on a single
|
||||
machine (see {{{SingleCluster}Single Node Setup}}).
|
||||
machine (see {{{./SingleCluster.html}Single Node Setup}}).
|
||||
|
||||
* {Prerequisites}
|
||||
|
||||
@ -571,440 +569,6 @@ $ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh stop proxyserver --config $HADOOP_CONF_D
|
||||
$ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh stop historyserver --config $HADOOP_CONF_DIR
|
||||
----
|
||||
|
||||
* {Running Hadoop in Secure Mode}
|
||||
|
||||
This section deals with important parameters to be specified in
|
||||
to run Hadoop in <<secure mode>> with strong, Kerberos-based
|
||||
authentication.
|
||||
|
||||
* <<<User Accounts for Hadoop Daemons>>>
|
||||
|
||||
Ensure that HDFS and YARN daemons run as different Unix users, for e.g.
|
||||
<<<hdfs>>> and <<<yarn>>>. Also, ensure that the MapReduce JobHistory
|
||||
server runs as user <<<mapred>>>.
|
||||
|
||||
It's recommended to have them share a Unix group, for e.g. <<<hadoop>>>.
|
||||
|
||||
*---------------+----------------------------------------------------------------------+
|
||||
|| User:Group || Daemons |
|
||||
*---------------+----------------------------------------------------------------------+
|
||||
| hdfs:hadoop | NameNode, Secondary NameNode, Checkpoint Node, Backup Node, DataNode |
|
||||
*---------------+----------------------------------------------------------------------+
|
||||
| yarn:hadoop | ResourceManager, NodeManager |
|
||||
*---------------+----------------------------------------------------------------------+
|
||||
| mapred:hadoop | MapReduce JobHistory Server |
|
||||
*---------------+----------------------------------------------------------------------+
|
||||
|
||||
* <<<Permissions for both HDFS and local fileSystem paths>>>
|
||||
|
||||
The following table lists various paths on HDFS and local filesystems (on
|
||||
all nodes) and recommended permissions:
|
||||
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
|| Filesystem || Path || User:Group || Permissions |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<dfs.namenode.name.dir>>> | hdfs:hadoop | drwx------ |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<dfs.datanode.data.dir>>> | hdfs:hadoop | drwx------ |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | $HADOOP_LOG_DIR | hdfs:hadoop | drwxrwxr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | $YARN_LOG_DIR | yarn:hadoop | drwxrwxr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<yarn.nodemanager.local-dirs>>> | yarn:hadoop | drwxr-xr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<yarn.nodemanager.log-dirs>>> | yarn:hadoop | drwxr-xr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | container-executor | root:hadoop | --Sr-s--- |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<conf/container-executor.cfg>>> | root:hadoop | r-------- |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| hdfs | / | hdfs:hadoop | drwxr-xr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| hdfs | /tmp | hdfs:hadoop | drwxrwxrwxt |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| hdfs | /user | hdfs:hadoop | drwxr-xr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| hdfs | <<<yarn.nodemanager.remote-app-log-dir>>> | yarn:hadoop | drwxrwxrwxt |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| hdfs | <<<mapreduce.jobhistory.intermediate-done-dir>>> | mapred:hadoop | |
|
||||
| | | | drwxrwxrwxt |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| hdfs | <<<mapreduce.jobhistory.done-dir>>> | mapred:hadoop | |
|
||||
| | | | drwxr-x--- |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
|
||||
* Kerberos Keytab files
|
||||
|
||||
* HDFS
|
||||
|
||||
The NameNode keytab file, on the NameNode host, should look like the
|
||||
following:
|
||||
|
||||
----
|
||||
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/nn.service.keytab
|
||||
Keytab name: FILE:/etc/security/keytab/nn.service.keytab
|
||||
KVNO Timestamp Principal
|
||||
4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
----
|
||||
|
||||
The Secondary NameNode keytab file, on that host, should look like the
|
||||
following:
|
||||
|
||||
----
|
||||
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/sn.service.keytab
|
||||
Keytab name: FILE:/etc/security/keytab/sn.service.keytab
|
||||
KVNO Timestamp Principal
|
||||
4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
----
|
||||
|
||||
The DataNode keytab file, on each host, should look like the following:
|
||||
|
||||
----
|
||||
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/dn.service.keytab
|
||||
Keytab name: FILE:/etc/security/keytab/dn.service.keytab
|
||||
KVNO Timestamp Principal
|
||||
4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
----
|
||||
|
||||
* YARN
|
||||
|
||||
The ResourceManager keytab file, on the ResourceManager host, should look
|
||||
like the following:
|
||||
|
||||
----
|
||||
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/rm.service.keytab
|
||||
Keytab name: FILE:/etc/security/keytab/rm.service.keytab
|
||||
KVNO Timestamp Principal
|
||||
4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
----
|
||||
|
||||
The NodeManager keytab file, on each host, should look like the following:
|
||||
|
||||
----
|
||||
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/nm.service.keytab
|
||||
Keytab name: FILE:/etc/security/keytab/nm.service.keytab
|
||||
KVNO Timestamp Principal
|
||||
4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
----
|
||||
|
||||
* MapReduce JobHistory Server
|
||||
|
||||
The MapReduce JobHistory Server keytab file, on that host, should look
|
||||
like the following:
|
||||
|
||||
----
|
||||
$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/jhs.service.keytab
|
||||
Keytab name: FILE:/etc/security/keytab/jhs.service.keytab
|
||||
KVNO Timestamp Principal
|
||||
4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
----
|
||||
|
||||
** Configuration in Secure Mode
|
||||
|
||||
* <<<conf/core-site.xml>>>
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<hadoop.security.authentication>>> | <kerberos> | <simple> is non-secure. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<hadoop.security.authorization>>> | <true> | |
|
||||
| | | Enable RPC service-level authorization. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|
||||
* <<<conf/hdfs-site.xml>>>
|
||||
|
||||
* Configurations for NameNode:
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.block.access.token.enable>>> | <true> | |
|
||||
| | | Enable HDFS block access tokens for secure operations. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.https.enable>>> | <true> | |
|
||||
| | | This value is deprecated. Use dfs.http.policy |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.http.policy>>> | <HTTP_ONLY> or <HTTPS_ONLY> or <HTTP_AND_HTTPS> | |
|
||||
| | | HTTPS_ONLY turns off http access |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.https-address>>> | <nn_host_fqdn:50470> | |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.https.port>>> | <50470> | |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.keytab.file>>> | </etc/security/keytab/nn.service.keytab> | |
|
||||
| | | Kerberos keytab file for the NameNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.kerberos.principal>>> | nn/_HOST@REALM.TLD | |
|
||||
| | | Kerberos principal name for the NameNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.kerberos.https.principal>>> | host/_HOST@REALM.TLD | |
|
||||
| | | HTTPS Kerberos principal name for the NameNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|
||||
* Configurations for Secondary NameNode:
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.secondary.http-address>>> | <c_nn_host_fqdn:50090> | |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.secondary.https-port>>> | <50470> | |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.secondary.keytab.file>>> | | |
|
||||
| | </etc/security/keytab/sn.service.keytab> | |
|
||||
| | | Kerberos keytab file for the NameNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.secondary.kerberos.principal>>> | sn/_HOST@REALM.TLD | |
|
||||
| | | Kerberos principal name for the Secondary NameNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.secondary.kerberos.https.principal>>> | | |
|
||||
| | host/_HOST@REALM.TLD | |
|
||||
| | | HTTPS Kerberos principal name for the Secondary NameNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|
||||
* Configurations for DataNode:
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.datanode.data.dir.perm>>> | 700 | |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.datanode.address>>> | <0.0.0.0:2003> | |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.datanode.https.address>>> | <0.0.0.0:2005> | |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.datanode.keytab.file>>> | </etc/security/keytab/dn.service.keytab> | |
|
||||
| | | Kerberos keytab file for the DataNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.datanode.kerberos.principal>>> | dn/_HOST@REALM.TLD | |
|
||||
| | | Kerberos principal name for the DataNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.datanode.kerberos.https.principal>>> | | |
|
||||
| | host/_HOST@REALM.TLD | |
|
||||
| | | HTTPS Kerberos principal name for the DataNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|
||||
* <<<conf/yarn-site.xml>>>
|
||||
|
||||
* WebAppProxy
|
||||
|
||||
The <<<WebAppProxy>>> provides a proxy between the web applications
|
||||
exported by an application and an end user. If security is enabled
|
||||
it will warn users before accessing a potentially unsafe web application.
|
||||
Authentication and authorization using the proxy is handled just like
|
||||
any other privileged web application.
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.web-proxy.address>>> | | |
|
||||
| | <<<WebAppProxy>>> host:port for proxy to AM web apps. | |
|
||||
| | | <host:port> if this is the same as <<<yarn.resourcemanager.webapp.address>>>|
|
||||
| | | or it is not defined then the <<<ResourceManager>>> will run the proxy|
|
||||
| | | otherwise a standalone proxy server will need to be launched.|
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.web-proxy.keytab>>> | | |
|
||||
| | </etc/security/keytab/web-app.service.keytab> | |
|
||||
| | | Kerberos keytab file for the WebAppProxy. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.web-proxy.principal>>> | wap/_HOST@REALM.TLD | |
|
||||
| | | Kerberos principal name for the WebAppProxy. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|
||||
* LinuxContainerExecutor
|
||||
|
||||
A <<<ContainerExecutor>>> used by YARN framework which define how any
|
||||
<container> launched and controlled.
|
||||
|
||||
The following are the available in Hadoop YARN:
|
||||
|
||||
*--------------------------------------+--------------------------------------+
|
||||
|| ContainerExecutor || Description |
|
||||
*--------------------------------------+--------------------------------------+
|
||||
| <<<DefaultContainerExecutor>>> | |
|
||||
| | The default executor which YARN uses to manage container execution. |
|
||||
| | The container process has the same Unix user as the NodeManager. |
|
||||
*--------------------------------------+--------------------------------------+
|
||||
| <<<LinuxContainerExecutor>>> | |
|
||||
| | Supported only on GNU/Linux, this executor runs the containers as either the |
|
||||
| | YARN user who submitted the application (when full security is enabled) or |
|
||||
| | as a dedicated user (defaults to nobody) when full security is not enabled. |
|
||||
| | When full security is enabled, this executor requires all user accounts to be |
|
||||
| | created on the cluster nodes where the containers are launched. It uses |
|
||||
| | a <setuid> executable that is included in the Hadoop distribution. |
|
||||
| | The NodeManager uses this executable to launch and kill containers. |
|
||||
| | The setuid executable switches to the user who has submitted the |
|
||||
| | application and launches or kills the containers. For maximum security, |
|
||||
| | this executor sets up restricted permissions and user/group ownership of |
|
||||
| | local files and directories used by the containers such as the shared |
|
||||
| | objects, jars, intermediate files, log files etc. Particularly note that, |
|
||||
| | because of this, except the application owner and NodeManager, no other |
|
||||
| | user can access any of the local files/directories including those |
|
||||
| | localized as part of the distributed cache. |
|
||||
*--------------------------------------+--------------------------------------+
|
||||
|
||||
To build the LinuxContainerExecutor executable run:
|
||||
|
||||
----
|
||||
$ mvn package -Dcontainer-executor.conf.dir=/etc/hadoop/
|
||||
----
|
||||
|
||||
The path passed in <<<-Dcontainer-executor.conf.dir>>> should be the
|
||||
path on the cluster nodes where a configuration file for the setuid
|
||||
executable should be located. The executable should be installed in
|
||||
$HADOOP_YARN_HOME/bin.
|
||||
|
||||
The executable must have specific permissions: 6050 or --Sr-s---
|
||||
permissions user-owned by <root> (super-user) and group-owned by a
|
||||
special group (e.g. <<<hadoop>>>) of which the NodeManager Unix user is
|
||||
the group member and no ordinary application user is. If any application
|
||||
user belongs to this special group, security will be compromised. This
|
||||
special group name should be specified for the configuration property
|
||||
<<<yarn.nodemanager.linux-container-executor.group>>> in both
|
||||
<<<conf/yarn-site.xml>>> and <<<conf/container-executor.cfg>>>.
|
||||
|
||||
For example, let's say that the NodeManager is run as user <yarn> who is
|
||||
part of the groups users and <hadoop>, any of them being the primary group.
|
||||
Let also be that <users> has both <yarn> and another user
|
||||
(application submitter) <alice> as its members, and <alice> does not
|
||||
belong to <hadoop>. Going by the above description, the setuid/setgid
|
||||
executable should be set 6050 or --Sr-s--- with user-owner as <yarn> and
|
||||
group-owner as <hadoop> which has <yarn> as its member (and not <users>
|
||||
which has <alice> also as its member besides <yarn>).
|
||||
|
||||
The LinuxTaskController requires that paths including and leading up to
|
||||
the directories specified in <<<yarn.nodemanager.local-dirs>>> and
|
||||
<<<yarn.nodemanager.log-dirs>>> to be set 755 permissions as described
|
||||
above in the table on permissions on directories.
|
||||
|
||||
* <<<conf/container-executor.cfg>>>
|
||||
|
||||
The executable requires a configuration file called
|
||||
<<<container-executor.cfg>>> to be present in the configuration
|
||||
directory passed to the mvn target mentioned above.
|
||||
|
||||
The configuration file must be owned by the user running NodeManager
|
||||
(user <<<yarn>>> in the above example), group-owned by anyone and
|
||||
should have the permissions 0400 or r--------.
|
||||
|
||||
The executable requires following configuration items to be present
|
||||
in the <<<conf/container-executor.cfg>>> file. The items should be
|
||||
mentioned as simple key=value pairs, one per-line:
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.nodemanager.linux-container-executor.group>>> | <hadoop> | |
|
||||
| | | Unix group of the NodeManager. The group owner of the |
|
||||
| | |<container-executor> binary should be this group. Should be same as the |
|
||||
| | | value with which the NodeManager is configured. This configuration is |
|
||||
| | | required for validating the secure access of the <container-executor> |
|
||||
| | | binary. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<banned.users>>> | hfds,yarn,mapred,bin | Banned users. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<allowed.system.users>>> | foo,bar | Allowed system users. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<min.user.id>>> | 1000 | Prevent other super-users. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|
||||
To re-cap, here are the local file-sysytem permissions required for the
|
||||
various paths related to the <<<LinuxContainerExecutor>>>:
|
||||
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
|| Filesystem || Path || User:Group || Permissions |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | container-executor | root:hadoop | --Sr-s--- |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<conf/container-executor.cfg>>> | root:hadoop | r-------- |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<yarn.nodemanager.local-dirs>>> | yarn:hadoop | drwxr-xr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<yarn.nodemanager.log-dirs>>> | yarn:hadoop | drwxr-xr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
|
||||
* Configurations for ResourceManager:
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.resourcemanager.keytab>>> | | |
|
||||
| | </etc/security/keytab/rm.service.keytab> | |
|
||||
| | | Kerberos keytab file for the ResourceManager. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.resourcemanager.principal>>> | rm/_HOST@REALM.TLD | |
|
||||
| | | Kerberos principal name for the ResourceManager. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|
||||
* Configurations for NodeManager:
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.nodemanager.keytab>>> | </etc/security/keytab/nm.service.keytab> | |
|
||||
| | | Kerberos keytab file for the NodeManager. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.nodemanager.principal>>> | nm/_HOST@REALM.TLD | |
|
||||
| | | Kerberos principal name for the NodeManager. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.nodemanager.container-executor.class>>> | | |
|
||||
| | <<<org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor>>> |
|
||||
| | | Use LinuxContainerExecutor. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.nodemanager.linux-container-executor.group>>> | <hadoop> | |
|
||||
| | | Unix group of the NodeManager. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|
||||
* <<<conf/mapred-site.xml>>>
|
||||
|
||||
* Configurations for MapReduce JobHistory Server:
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<mapreduce.jobhistory.address>>> | | |
|
||||
| | MapReduce JobHistory Server <host:port> | Default port is 10020. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<mapreduce.jobhistory.keytab>>> | |
|
||||
| | </etc/security/keytab/jhs.service.keytab> | |
|
||||
| | | Kerberos keytab file for the MapReduce JobHistory Server. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<mapreduce.jobhistory.principal>>> | jhs/_HOST@REALM.TLD | |
|
||||
| | | Kerberos principal name for the MapReduce JobHistory Server. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|
||||
|
||||
* {Operating the Hadoop Cluster}
|
||||
|
||||
|
@ -44,8 +44,9 @@ Overview
|
||||
Generic Options
|
||||
|
||||
The following options are supported by {{dfsadmin}}, {{fs}}, {{fsck}},
|
||||
{{job}} and {{fetchdt}}. Applications should implement {{{some_useful_url}Tool}} to support
|
||||
{{{another_useful_url}GenericOptions}}.
|
||||
{{job}} and {{fetchdt}}. Applications should implement
|
||||
{{{../../api/org/apache/hadoop/util/Tool.html}Tool}} to support
|
||||
GenericOptions.
|
||||
|
||||
*------------------------------------------------+-----------------------------+
|
||||
|| GENERIC_OPTION || Description
|
||||
@ -123,7 +124,8 @@ User Commands
|
||||
|
||||
* <<<fsck>>>
|
||||
|
||||
Runs a HDFS filesystem checking utility. See {{Fsck}} for more info.
|
||||
Runs a HDFS filesystem checking utility.
|
||||
See {{{../hadoop-hdfs/HdfsUserGuide.html#fsck}fsck}} for more info.
|
||||
|
||||
Usage: <<<hadoop fsck [GENERIC_OPTIONS] <path> [-move | -delete | -openforwrite] [-files [-blocks [-locations | -racks]]]>>>
|
||||
|
||||
@ -149,7 +151,8 @@ User Commands
|
||||
|
||||
* <<<fetchdt>>>
|
||||
|
||||
Gets Delegation Token from a NameNode. See {{fetchdt}} for more info.
|
||||
Gets Delegation Token from a NameNode.
|
||||
See {{{../hadoop-hdfs/HdfsUserGuide.html#fetchdt}fetchdt}} for more info.
|
||||
|
||||
Usage: <<<hadoop fetchdt [GENERIC_OPTIONS] [--webservice <namenode_http_addr>] <path> >>>
|
||||
|
||||
@ -302,7 +305,8 @@ Administration Commands
|
||||
* <<<balancer>>>
|
||||
|
||||
Runs a cluster balancing utility. An administrator can simply press Ctrl-C
|
||||
to stop the rebalancing process. See Rebalancer for more details.
|
||||
to stop the rebalancing process. See
|
||||
{{{../hadoop-hdfs/HdfsUserGuide.html#Rebalancer}Rebalancer}} for more details.
|
||||
|
||||
Usage: <<<hadoop balancer [-threshold <threshold>]>>>
|
||||
|
||||
@ -445,7 +449,7 @@ Administration Commands
|
||||
* <<<namenode>>>
|
||||
|
||||
Runs the namenode. More info about the upgrade, rollback and finalize is
|
||||
at Upgrade Rollback
|
||||
at {{{../hadoop-hdfs/HdfsUserGuide.html#Upgrade_and_Rollback}Upgrade Rollback}}.
|
||||
|
||||
Usage: <<<hadoop namenode [-format] | [-upgrade] | [-rollback] | [-finalize] | [-importCheckpoint]>>>
|
||||
|
||||
@ -474,8 +478,9 @@ Administration Commands
|
||||
|
||||
* <<<secondarynamenode>>>
|
||||
|
||||
Runs the HDFS secondary namenode. See Secondary Namenode for more
|
||||
info.
|
||||
Runs the HDFS secondary namenode.
|
||||
See {{{../hadoop-hdfs/HdfsUserGuide.html#Secondary_NameNode}Secondary Namenode}}
|
||||
for more info.
|
||||
|
||||
Usage: <<<hadoop secondarynamenode [-checkpoint [force]] | [-geteditsize]>>>
|
||||
|
||||
|
@ -233,9 +233,10 @@ hand-in-hand to address this.
|
||||
|
||||
* In particular for MapReduce applications, the developer community will
|
||||
try our best to support provide binary compatibility across major
|
||||
releases e.g. applications using org.apache.hadop.mapred.* APIs are
|
||||
supported compatibly across hadoop-1.x and hadoop-2.x. See
|
||||
{{{../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html}
|
||||
releases e.g. applications using org.apache.hadoop.mapred.
|
||||
|
||||
* APIs are supported compatibly across hadoop-1.x and hadoop-2.x. See
|
||||
{{{../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html}
|
||||
Compatibility for MapReduce applications between hadoop-1.x and hadoop-2.x}}
|
||||
for more details.
|
||||
|
||||
@ -248,13 +249,13 @@ hand-in-hand to address this.
|
||||
|
||||
* {{{../hadoop-hdfs/WebHDFS.html}WebHDFS}} - Stable
|
||||
|
||||
* {{{../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html}ResourceManager}}
|
||||
* {{{../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html}ResourceManager}}
|
||||
|
||||
* {{{../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html}NodeManager}}
|
||||
* {{{../../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html}NodeManager}}
|
||||
|
||||
* {{{../hadoop-yarn/hadoop-yarn-site/MapredAppMasterRest.html}MR Application Master}}
|
||||
* {{{../../hadoop-yarn/hadoop-yarn-site/MapredAppMasterRest.html}MR Application Master}}
|
||||
|
||||
* {{{../hadoop-yarn/hadoop-yarn-site/HistoryServerRest.html}History Server}}
|
||||
* {{{../../hadoop-yarn/hadoop-yarn-site/HistoryServerRest.html}History Server}}
|
||||
|
||||
*** Policy
|
||||
|
||||
@ -512,7 +513,8 @@ hand-in-hand to address this.
|
||||
{{{https://issues.apache.org/jira/browse/HADOOP-9517}HADOOP-9517}}
|
||||
|
||||
* Binary compatibility for MapReduce end-user applications between hadoop-1.x and hadoop-2.x -
|
||||
{{{../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html}MapReduce Compatibility between hadoop-1.x and hadoop-2.x}}
|
||||
{{{../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html}
|
||||
MapReduce Compatibility between hadoop-1.x and hadoop-2.x}}
|
||||
|
||||
* Annotations for interfaces as per interface classification
|
||||
schedule -
|
||||
|
@ -88,7 +88,7 @@ chgrp
|
||||
|
||||
Change group association of files. The user must be the owner of files, or
|
||||
else a super-user. Additional information is in the
|
||||
{{{betterurl}Permissions Guide}}.
|
||||
{{{../hadoop-hdfs/HdfsPermissionsGuide.html}Permissions Guide}}.
|
||||
|
||||
Options
|
||||
|
||||
@ -101,7 +101,7 @@ chmod
|
||||
Change the permissions of files. With -R, make the change recursively
|
||||
through the directory structure. The user must be the owner of the file, or
|
||||
else a super-user. Additional information is in the
|
||||
{{{betterurl}Permissions Guide}}.
|
||||
{{{../hadoop-hdfs/HdfsPermissionsGuide.html}Permissions Guide}}.
|
||||
|
||||
Options
|
||||
|
||||
@ -112,7 +112,7 @@ chown
|
||||
Usage: <<<hdfs dfs -chown [-R] [OWNER][:[GROUP]] URI [URI ]>>>
|
||||
|
||||
Change the owner of files. The user must be a super-user. Additional information
|
||||
is in the {{{betterurl}Permissions Guide}}.
|
||||
is in the {{{../hadoop-hdfs/HdfsPermissionsGuide.html}Permissions Guide}}.
|
||||
|
||||
Options
|
||||
|
||||
@ -210,8 +210,8 @@ expunge
|
||||
|
||||
Usage: <<<hdfs dfs -expunge>>>
|
||||
|
||||
Empty the Trash. Refer to the {{{betterurl}HDFS Architecture Guide}} for
|
||||
more information on the Trash feature.
|
||||
Empty the Trash. Refer to the {{{../hadoop-hdfs/HdfsDesign.html}
|
||||
HDFS Architecture Guide}} for more information on the Trash feature.
|
||||
|
||||
get
|
||||
|
||||
@ -439,7 +439,9 @@ test
|
||||
Options:
|
||||
|
||||
* The -e option will check to see if the file exists, returning 0 if true.
|
||||
|
||||
* The -z option will check to see if the file is zero length, returning 0 if true.
|
||||
|
||||
* The -d option will check to see if the path is directory, returning 0 if true.
|
||||
|
||||
Example:
|
||||
|
@ -18,8 +18,6 @@
|
||||
|
||||
Hadoop Interface Taxonomy: Audience and Stability Classification
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Motivation
|
||||
|
@ -117,23 +117,19 @@ Native Libraries Guide
|
||||
* zlib-development package (stable version >= 1.2.0)
|
||||
|
||||
Once you installed the prerequisite packages use the standard hadoop
|
||||
build.xml file and pass along the compile.native flag (set to true) to
|
||||
build the native hadoop library:
|
||||
pom.xml file and pass along the native flag to build the native hadoop
|
||||
library:
|
||||
|
||||
----
|
||||
$ ant -Dcompile.native=true <target>
|
||||
$ mvn package -Pdist,native -Dskiptests -Dtar
|
||||
----
|
||||
|
||||
You should see the newly-built library in:
|
||||
|
||||
----
|
||||
$ build/native/<platform>/lib
|
||||
$ hadoop-dist/target/hadoop-${project.version}/lib/native
|
||||
----
|
||||
|
||||
where <platform> is a combination of the system-properties:
|
||||
${os.name}-${os.arch}-${sun.arch.data.model} (for example,
|
||||
Linux-i386-32).
|
||||
|
||||
Please note the following:
|
||||
|
||||
* It is mandatory to install both the zlib and gzip development
|
||||
|
@ -0,0 +1,637 @@
|
||||
~~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~~ you may not use this file except in compliance with the License.
|
||||
~~ You may obtain a copy of the License at
|
||||
~~
|
||||
~~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~~
|
||||
~~ Unless required by applicable law or agreed to in writing, software
|
||||
~~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~~ See the License for the specific language governing permissions and
|
||||
~~ limitations under the License. See accompanying LICENSE file.
|
||||
|
||||
---
|
||||
Hadoop in Secure Mode
|
||||
---
|
||||
---
|
||||
${maven.build.timestamp}
|
||||
|
||||
%{toc|section=0|fromDepth=0|toDepth=3}
|
||||
|
||||
Hadoop in Secure Mode
|
||||
|
||||
* Introduction
|
||||
|
||||
This document describes how to configure authentication for Hadoop in
|
||||
secure mode.
|
||||
|
||||
By default Hadoop runs in non-secure mode in which no actual
|
||||
authentication is required.
|
||||
By configuring Hadoop runs in secure mode,
|
||||
each user and service needs to be authenticated by Kerberos
|
||||
in order to use Hadoop services.
|
||||
|
||||
Security features of Hadoop consist of
|
||||
{{{Authentication}authentication}},
|
||||
{{{./ServiceLevelAuth.html}service level authorization}},
|
||||
{{{./HttpAuthentication.html}authentication for Web consoles}}
|
||||
and {{{Data confidentiality}data confidenciality}}.
|
||||
|
||||
|
||||
* Authentication
|
||||
|
||||
** End User Accounts
|
||||
|
||||
When service level authentication is turned on,
|
||||
end users using Hadoop in secure mode needs to be authenticated by Kerberos.
|
||||
The simplest way to do authentication is using <<<kinit>>> command of Kerberos.
|
||||
|
||||
** User Accounts for Hadoop Daemons
|
||||
|
||||
Ensure that HDFS and YARN daemons run as different Unix users,
|
||||
e.g. <<<hdfs>>> and <<<yarn>>>.
|
||||
Also, ensure that the MapReduce JobHistory server runs as
|
||||
different user such as <<<mapred>>>.
|
||||
|
||||
It's recommended to have them share a Unix group, for e.g. <<<hadoop>>>.
|
||||
See also "{{Mapping from user to group}}" for group management.
|
||||
|
||||
*---------------+----------------------------------------------------------------------+
|
||||
|| User:Group || Daemons |
|
||||
*---------------+----------------------------------------------------------------------+
|
||||
| hdfs:hadoop | NameNode, Secondary NameNode, JournalNode, DataNode |
|
||||
*---------------+----------------------------------------------------------------------+
|
||||
| yarn:hadoop | ResourceManager, NodeManager |
|
||||
*---------------+----------------------------------------------------------------------+
|
||||
| mapred:hadoop | MapReduce JobHistory Server |
|
||||
*---------------+----------------------------------------------------------------------+
|
||||
|
||||
** Kerberos principals for Hadoop Daemons and Users
|
||||
|
||||
For running hadoop service daemons in Hadoop in secure mode,
|
||||
Kerberos principals are required.
|
||||
Each service reads auhenticate information saved in keytab file with appropriate permission.
|
||||
|
||||
HTTP web-consoles should be served by principal different from RPC's one.
|
||||
|
||||
Subsections below shows the examples of credentials for Hadoop services.
|
||||
|
||||
*** HDFS
|
||||
|
||||
The NameNode keytab file, on the NameNode host, should look like the
|
||||
following:
|
||||
|
||||
----
|
||||
$ klist -e -k -t /etc/security/keytab/nn.service.keytab
|
||||
Keytab name: FILE:/etc/security/keytab/nn.service.keytab
|
||||
KVNO Timestamp Principal
|
||||
4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
----
|
||||
|
||||
The Secondary NameNode keytab file, on that host, should look like the
|
||||
following:
|
||||
|
||||
----
|
||||
$ klist -e -k -t /etc/security/keytab/sn.service.keytab
|
||||
Keytab name: FILE:/etc/security/keytab/sn.service.keytab
|
||||
KVNO Timestamp Principal
|
||||
4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
----
|
||||
|
||||
The DataNode keytab file, on each host, should look like the following:
|
||||
|
||||
----
|
||||
$ klist -e -k -t /etc/security/keytab/dn.service.keytab
|
||||
Keytab name: FILE:/etc/security/keytab/dn.service.keytab
|
||||
KVNO Timestamp Principal
|
||||
4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
----
|
||||
|
||||
*** YARN
|
||||
|
||||
The ResourceManager keytab file, on the ResourceManager host, should look
|
||||
like the following:
|
||||
|
||||
----
|
||||
$ klist -e -k -t /etc/security/keytab/rm.service.keytab
|
||||
Keytab name: FILE:/etc/security/keytab/rm.service.keytab
|
||||
KVNO Timestamp Principal
|
||||
4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
----
|
||||
|
||||
The NodeManager keytab file, on each host, should look like the following:
|
||||
|
||||
----
|
||||
$ klist -e -k -t /etc/security/keytab/nm.service.keytab
|
||||
Keytab name: FILE:/etc/security/keytab/nm.service.keytab
|
||||
KVNO Timestamp Principal
|
||||
4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
----
|
||||
|
||||
*** MapReduce JobHistory Server
|
||||
|
||||
The MapReduce JobHistory Server keytab file, on that host, should look
|
||||
like the following:
|
||||
|
||||
----
|
||||
$ klist -e -k -t /etc/security/keytab/jhs.service.keytab
|
||||
Keytab name: FILE:/etc/security/keytab/jhs.service.keytab
|
||||
KVNO Timestamp Principal
|
||||
4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC)
|
||||
4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5)
|
||||
----
|
||||
|
||||
** Mapping from Kerberos principal to OS user account
|
||||
|
||||
Hadoop maps Kerberos principal to OS user account using
|
||||
the rule specified by <<<hadoop.security.auth_to_local>>>
|
||||
which works in the same way as the <<<auth_to_local>>> in
|
||||
{{{http://web.mit.edu/Kerberos/krb5-latest/doc/admin/conf_files/krb5_conf.html}Kerberos configuration file (krb5.conf)}}.
|
||||
|
||||
By default, it picks the first component of principal name as a user name
|
||||
if the realms matches to the <<<defalut_realm>>> (usually defined in /etc/krb5.conf).
|
||||
For example, <<<host/full.qualified.domain.name@REALM.TLD>>> is mapped to <<<host>>>
|
||||
by default rule.
|
||||
|
||||
** Mapping from user to group
|
||||
|
||||
Though files on HDFS are associated to owner and group,
|
||||
Hadoop does not have the definition of group by itself.
|
||||
Mapping from user to group is done by OS or LDAP.
|
||||
|
||||
You can change a way of mapping by
|
||||
specifying the name of mapping provider as a value of
|
||||
<<<hadoop.security.group.mapping>>>
|
||||
See {{{../hadoop-hdfs/HdfsPermissionsGuide.html}HDFS Permissions Guide}} for details.
|
||||
|
||||
Practically you need to manage SSO environment using Kerberos with LDAP
|
||||
for Hadoop in secure mode.
|
||||
|
||||
** Proxy user
|
||||
|
||||
Some products such as Apache Oozie which access the services of Hadoop
|
||||
on behalf of end users need to be able to impersonate end users.
|
||||
You can configure proxy user using properties
|
||||
<<<hadoop.proxyuser.${superuser}.hosts>>> and <<<hadoop.proxyuser.${superuser}.groups>>>.
|
||||
|
||||
For example, by specifying as below in core-site.xml,
|
||||
user named <<<oozie>>> accessing from any host
|
||||
can impersonate any user belonging to any group.
|
||||
|
||||
----
|
||||
<property>
|
||||
<name>hadoop.proxyuser.oozie.hosts</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
<property>
|
||||
<name>hadoop.proxyuser.oozie.groups</name>
|
||||
<value>*</value>
|
||||
</property>
|
||||
----
|
||||
|
||||
** Secure DataNode
|
||||
|
||||
Because the data transfer protocol of DataNode
|
||||
does not use the RPC framework of Hadoop,
|
||||
DataNode must authenticate itself by
|
||||
using privileged ports which are specified by
|
||||
<<<dfs.datanode.address>>> and <<<dfs.datanode.http.address>>>.
|
||||
This authentication is based on the assumption
|
||||
that the attacker won't be able to get root privileges.
|
||||
|
||||
When you execute <<<hdfs datanode>>> command as root,
|
||||
server process binds privileged port at first,
|
||||
then drops privilege and runs as the user account specified by
|
||||
<<<HADOOP_SECURE_DN_USER>>>.
|
||||
This startup process uses jsvc installed to <<<JSVC_HOME>>>.
|
||||
You must specify <<<HADOOP_SECURE_DN_USER>>> and <<<JSVC_HOME>>>
|
||||
as environment variables on start up (in hadoop-env.sh).
|
||||
|
||||
|
||||
* Data confidentiality
|
||||
|
||||
** Data Encryption on RPC
|
||||
|
||||
The data transfered between hadoop services and clients.
|
||||
Setting <<<hadoop.rpc.protection>>> to <<<"privacy">>> in the core-site.xml
|
||||
activate data encryption.
|
||||
|
||||
** Data Encryption on Block data transfer.
|
||||
|
||||
You need to set <<<dfs.encrypt.data.transfer>>> to <<<"true">>> in the hdfs-site.xml
|
||||
in order to activate data encryption for data transfer protocol of DataNode.
|
||||
|
||||
** Data Encryption on HTTP
|
||||
|
||||
Data transfer between Web-console and clients are protected by using SSL(HTTPS).
|
||||
|
||||
|
||||
* Configuration
|
||||
|
||||
** Permissions for both HDFS and local fileSystem paths
|
||||
|
||||
The following table lists various paths on HDFS and local filesystems (on
|
||||
all nodes) and recommended permissions:
|
||||
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
|| Filesystem || Path || User:Group || Permissions |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<dfs.namenode.name.dir>>> | hdfs:hadoop | drwx------ |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<dfs.datanode.data.dir>>> | hdfs:hadoop | drwx------ |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | $HADOOP_LOG_DIR | hdfs:hadoop | drwxrwxr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | $YARN_LOG_DIR | yarn:hadoop | drwxrwxr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<yarn.nodemanager.local-dirs>>> | yarn:hadoop | drwxr-xr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<yarn.nodemanager.log-dirs>>> | yarn:hadoop | drwxr-xr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | container-executor | root:hadoop | --Sr-s--- |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<conf/container-executor.cfg>>> | root:hadoop | r-------- |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| hdfs | / | hdfs:hadoop | drwxr-xr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| hdfs | /tmp | hdfs:hadoop | drwxrwxrwxt |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| hdfs | /user | hdfs:hadoop | drwxr-xr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| hdfs | <<<yarn.nodemanager.remote-app-log-dir>>> | yarn:hadoop | drwxrwxrwxt |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| hdfs | <<<mapreduce.jobhistory.intermediate-done-dir>>> | mapred:hadoop | |
|
||||
| | | | drwxrwxrwxt |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| hdfs | <<<mapreduce.jobhistory.done-dir>>> | mapred:hadoop | |
|
||||
| | | | drwxr-x--- |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
|
||||
** Common Configurations
|
||||
|
||||
In order to turn on RPC authentication in hadoop,
|
||||
set the value of <<<hadoop.security.authentication>>> property to
|
||||
<<<"kerberos">>>, and set security related settings listed below appropriately.
|
||||
|
||||
The following properties should be in the <<<core-site.xml>>> of all the
|
||||
nodes in the cluster.
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<hadoop.security.authentication>>> | <kerberos> | |
|
||||
| | | <<<simple>>> : No authentication. (default) \
|
||||
| | | <<<kerberos>>> : Enable authentication by Kerberos. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<hadoop.security.authorization>>> | <true> | |
|
||||
| | | Enable {{{./ServiceLevelAuth.html}RPC service-level authorization}}. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<hadoop.rpc.protection>>> | <authentication> |
|
||||
| | | <authentication> : authentication only (default) \
|
||||
| | | <integrity> : integrity check in addition to authentication \
|
||||
| | | <privacy> : data encryption in addition to integrity |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<hadoop.security.auth_to_local>>> | | |
|
||||
| | <<<RULE:>>><exp1>\
|
||||
| | <<<RULE:>>><exp2>\
|
||||
| | <...>\
|
||||
| | DEFAULT |
|
||||
| | | The value is string containing new line characters.
|
||||
| | | See
|
||||
| | | {{{http://web.mit.edu/Kerberos/krb5-latest/doc/admin/conf_files/krb5_conf.html}Kerberos documentation}}
|
||||
| | | for format for <exp>.
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<hadoop.proxyuser.>>><superuser><<<.hosts>>> | | |
|
||||
| | | comma separated hosts from which <superuser> access are allowd to impersonation. |
|
||||
| | | <<<*>>> means wildcard. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<hadoop.proxyuser.>>><superuser><<<.groups>>> | | |
|
||||
| | | comma separated groups to which users impersonated by <superuser> belongs. |
|
||||
| | | <<<*>>> means wildcard. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
Configuration for <<<conf/core-site.xml>>>
|
||||
|
||||
** NameNode
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.block.access.token.enable>>> | <true> | |
|
||||
| | | Enable HDFS block access tokens for secure operations. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.https.enable>>> | <true> | |
|
||||
| | | This value is deprecated. Use dfs.http.policy |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.http.policy>>> | <HTTP_ONLY> or <HTTPS_ONLY> or <HTTP_AND_HTTPS> | |
|
||||
| | | HTTPS_ONLY turns off http access |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.https-address>>> | <nn_host_fqdn:50470> | |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.https.port>>> | <50470> | |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.keytab.file>>> | </etc/security/keytab/nn.service.keytab> | |
|
||||
| | | Kerberos keytab file for the NameNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.kerberos.principal>>> | nn/_HOST@REALM.TLD | |
|
||||
| | | Kerberos principal name for the NameNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.kerberos.https.principal>>> | host/_HOST@REALM.TLD | |
|
||||
| | | HTTPS Kerberos principal name for the NameNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
Configuration for <<<conf/hdfs-site.xml>>>
|
||||
|
||||
** Secondary NameNode
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.secondary.http-address>>> | <c_nn_host_fqdn:50090> | |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.secondary.https-port>>> | <50470> | |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.secondary.keytab.file>>> | | |
|
||||
| | </etc/security/keytab/sn.service.keytab> | |
|
||||
| | | Kerberos keytab file for the NameNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.secondary.kerberos.principal>>> | sn/_HOST@REALM.TLD | |
|
||||
| | | Kerberos principal name for the Secondary NameNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.namenode.secondary.kerberos.https.principal>>> | | |
|
||||
| | host/_HOST@REALM.TLD | |
|
||||
| | | HTTPS Kerberos principal name for the Secondary NameNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
Configuration for <<<conf/hdfs-site.xml>>>
|
||||
|
||||
** DataNode
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.datanode.data.dir.perm>>> | 700 | |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.datanode.address>>> | <0.0.0.0:1004> | |
|
||||
| | | Secure DataNode must use privileged port |
|
||||
| | | in order to assure that the server was started securely. |
|
||||
| | | This means that the server must be started via jsvc. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.datanode.http.address>>> | <0.0.0.0:1006> | |
|
||||
| | | Secure DataNode must use privileged port |
|
||||
| | | in order to assure that the server was started securely. |
|
||||
| | | This means that the server must be started via jsvc. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.datanode.https.address>>> | <0.0.0.0:50470> | |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.datanode.keytab.file>>> | </etc/security/keytab/dn.service.keytab> | |
|
||||
| | | Kerberos keytab file for the DataNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.datanode.kerberos.principal>>> | dn/_HOST@REALM.TLD | |
|
||||
| | | Kerberos principal name for the DataNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.datanode.kerberos.https.principal>>> | | |
|
||||
| | host/_HOST@REALM.TLD | |
|
||||
| | | HTTPS Kerberos principal name for the DataNode. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.encrypt.data.transfer>>> | <false> | |
|
||||
| | | set to <<<true>>> when using data encryption |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
Configuration for <<<conf/hdfs-site.xml>>>
|
||||
|
||||
|
||||
** WebHDFS
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.webhdfs.enabled>>> | http/_HOST@REALM.TLD | |
|
||||
| | | Enable security on WebHDFS. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.web.authentication.kerberos.principal>>> | http/_HOST@REALM.TLD | |
|
||||
| | | Kerberos keytab file for the WebHDFS. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<dfs.web.authentication.kerberos.keytab>>> | </etc/security/keytab/http.service.keytab> | |
|
||||
| | | Kerberos principal name for WebHDFS. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
Configuration for <<<conf/hdfs-site.xml>>>
|
||||
|
||||
|
||||
** ResourceManager
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.resourcemanager.keytab>>> | | |
|
||||
| | </etc/security/keytab/rm.service.keytab> | |
|
||||
| | | Kerberos keytab file for the ResourceManager. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.resourcemanager.principal>>> | rm/_HOST@REALM.TLD | |
|
||||
| | | Kerberos principal name for the ResourceManager. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
Configuration for <<<conf/yarn-site.xml>>>
|
||||
|
||||
** NodeManager
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.nodemanager.keytab>>> | </etc/security/keytab/nm.service.keytab> | |
|
||||
| | | Kerberos keytab file for the NodeManager. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.nodemanager.principal>>> | nm/_HOST@REALM.TLD | |
|
||||
| | | Kerberos principal name for the NodeManager. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.nodemanager.container-executor.class>>> | | |
|
||||
| | <<<org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor>>> |
|
||||
| | | Use LinuxContainerExecutor. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.nodemanager.linux-container-executor.group>>> | <hadoop> | |
|
||||
| | | Unix group of the NodeManager. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.nodemanager.linux-container-executor.path>>> | </path/to/bin/container-executor> | |
|
||||
| | | The path to the executable of Linux container executor. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
Configuration for <<<conf/yarn-site.xml>>>
|
||||
|
||||
** Configuration for WebAppProxy
|
||||
|
||||
The <<<WebAppProxy>>> provides a proxy between the web applications
|
||||
exported by an application and an end user. If security is enabled
|
||||
it will warn users before accessing a potentially unsafe web application.
|
||||
Authentication and authorization using the proxy is handled just like
|
||||
any other privileged web application.
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.web-proxy.address>>> | | |
|
||||
| | <<<WebAppProxy>>> host:port for proxy to AM web apps. | |
|
||||
| | | <host:port> if this is the same as <<<yarn.resourcemanager.webapp.address>>>|
|
||||
| | | or it is not defined then the <<<ResourceManager>>> will run the proxy|
|
||||
| | | otherwise a standalone proxy server will need to be launched.|
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.web-proxy.keytab>>> | | |
|
||||
| | </etc/security/keytab/web-app.service.keytab> | |
|
||||
| | | Kerberos keytab file for the WebAppProxy. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.web-proxy.principal>>> | wap/_HOST@REALM.TLD | |
|
||||
| | | Kerberos principal name for the WebAppProxy. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
Configuration for <<<conf/yarn-site.xml>>>
|
||||
|
||||
** LinuxContainerExecutor
|
||||
|
||||
A <<<ContainerExecutor>>> used by YARN framework which define how any
|
||||
<container> launched and controlled.
|
||||
|
||||
The following are the available in Hadoop YARN:
|
||||
|
||||
*--------------------------------------+--------------------------------------+
|
||||
|| ContainerExecutor || Description |
|
||||
*--------------------------------------+--------------------------------------+
|
||||
| <<<DefaultContainerExecutor>>> | |
|
||||
| | The default executor which YARN uses to manage container execution. |
|
||||
| | The container process has the same Unix user as the NodeManager. |
|
||||
*--------------------------------------+--------------------------------------+
|
||||
| <<<LinuxContainerExecutor>>> | |
|
||||
| | Supported only on GNU/Linux, this executor runs the containers as either the |
|
||||
| | YARN user who submitted the application (when full security is enabled) or |
|
||||
| | as a dedicated user (defaults to nobody) when full security is not enabled. |
|
||||
| | When full security is enabled, this executor requires all user accounts to be |
|
||||
| | created on the cluster nodes where the containers are launched. It uses |
|
||||
| | a <setuid> executable that is included in the Hadoop distribution. |
|
||||
| | The NodeManager uses this executable to launch and kill containers. |
|
||||
| | The setuid executable switches to the user who has submitted the |
|
||||
| | application and launches or kills the containers. For maximum security, |
|
||||
| | this executor sets up restricted permissions and user/group ownership of |
|
||||
| | local files and directories used by the containers such as the shared |
|
||||
| | objects, jars, intermediate files, log files etc. Particularly note that, |
|
||||
| | because of this, except the application owner and NodeManager, no other |
|
||||
| | user can access any of the local files/directories including those |
|
||||
| | localized as part of the distributed cache. |
|
||||
*--------------------------------------+--------------------------------------+
|
||||
|
||||
To build the LinuxContainerExecutor executable run:
|
||||
|
||||
----
|
||||
$ mvn package -Dcontainer-executor.conf.dir=/etc/hadoop/
|
||||
----
|
||||
|
||||
The path passed in <<<-Dcontainer-executor.conf.dir>>> should be the
|
||||
path on the cluster nodes where a configuration file for the setuid
|
||||
executable should be located. The executable should be installed in
|
||||
$HADOOP_YARN_HOME/bin.
|
||||
|
||||
The executable must have specific permissions: 6050 or --Sr-s---
|
||||
permissions user-owned by <root> (super-user) and group-owned by a
|
||||
special group (e.g. <<<hadoop>>>) of which the NodeManager Unix user is
|
||||
the group member and no ordinary application user is. If any application
|
||||
user belongs to this special group, security will be compromised. This
|
||||
special group name should be specified for the configuration property
|
||||
<<<yarn.nodemanager.linux-container-executor.group>>> in both
|
||||
<<<conf/yarn-site.xml>>> and <<<conf/container-executor.cfg>>>.
|
||||
|
||||
For example, let's say that the NodeManager is run as user <yarn> who is
|
||||
part of the groups users and <hadoop>, any of them being the primary group.
|
||||
Let also be that <users> has both <yarn> and another user
|
||||
(application submitter) <alice> as its members, and <alice> does not
|
||||
belong to <hadoop>. Going by the above description, the setuid/setgid
|
||||
executable should be set 6050 or --Sr-s--- with user-owner as <yarn> and
|
||||
group-owner as <hadoop> which has <yarn> as its member (and not <users>
|
||||
which has <alice> also as its member besides <yarn>).
|
||||
|
||||
The LinuxTaskController requires that paths including and leading up to
|
||||
the directories specified in <<<yarn.nodemanager.local-dirs>>> and
|
||||
<<<yarn.nodemanager.log-dirs>>> to be set 755 permissions as described
|
||||
above in the table on permissions on directories.
|
||||
|
||||
* <<<conf/container-executor.cfg>>>
|
||||
|
||||
The executable requires a configuration file called
|
||||
<<<container-executor.cfg>>> to be present in the configuration
|
||||
directory passed to the mvn target mentioned above.
|
||||
|
||||
The configuration file must be owned by the user running NodeManager
|
||||
(user <<<yarn>>> in the above example), group-owned by anyone and
|
||||
should have the permissions 0400 or r--------.
|
||||
|
||||
The executable requires following configuration items to be present
|
||||
in the <<<conf/container-executor.cfg>>> file. The items should be
|
||||
mentioned as simple key=value pairs, one per-line:
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<yarn.nodemanager.linux-container-executor.group>>> | <hadoop> | |
|
||||
| | | Unix group of the NodeManager. The group owner of the |
|
||||
| | |<container-executor> binary should be this group. Should be same as the |
|
||||
| | | value with which the NodeManager is configured. This configuration is |
|
||||
| | | required for validating the secure access of the <container-executor> |
|
||||
| | | binary. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<banned.users>>> | hfds,yarn,mapred,bin | Banned users. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<allowed.system.users>>> | foo,bar | Allowed system users. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<min.user.id>>> | 1000 | Prevent other super-users. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
Configuration for <<<conf/yarn-site.xml>>>
|
||||
|
||||
To re-cap, here are the local file-sysytem permissions required for the
|
||||
various paths related to the <<<LinuxContainerExecutor>>>:
|
||||
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
|| Filesystem || Path || User:Group || Permissions |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | container-executor | root:hadoop | --Sr-s--- |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<conf/container-executor.cfg>>> | root:hadoop | r-------- |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<yarn.nodemanager.local-dirs>>> | yarn:hadoop | drwxr-xr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
| local | <<<yarn.nodemanager.log-dirs>>> | yarn:hadoop | drwxr-xr-x |
|
||||
*-------------------+-------------------+------------------+------------------+
|
||||
|
||||
** MapReduce JobHistory Server
|
||||
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
|| Parameter || Value || Notes |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<mapreduce.jobhistory.address>>> | | |
|
||||
| | MapReduce JobHistory Server <host:port> | Default port is 10020. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<mapreduce.jobhistory.keytab>>> | |
|
||||
| | </etc/security/keytab/jhs.service.keytab> | |
|
||||
| | | Kerberos keytab file for the MapReduce JobHistory Server. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
| <<<mapreduce.jobhistory.principal>>> | jhs/_HOST@REALM.TLD | |
|
||||
| | | Kerberos principal name for the MapReduce JobHistory Server. |
|
||||
*-------------------------+-------------------------+------------------------+
|
||||
Configuration for <<<conf/mapred-site.xml>>>
|
@ -29,8 +29,10 @@ Service Level Authorization Guide
|
||||
|
||||
Make sure Hadoop is installed, configured and setup correctly. For more
|
||||
information see:
|
||||
* Single Node Setup for first-time users.
|
||||
* Cluster Setup for large, distributed clusters.
|
||||
|
||||
* {{{./SingleCluster.html}Single Node Setup}} for first-time users.
|
||||
|
||||
* {{{./ClusterSetup.html}Cluster Setup}} for large, distributed clusters.
|
||||
|
||||
* Overview
|
||||
|
||||
|
@ -18,8 +18,6 @@
|
||||
|
||||
Hadoop MapReduce Next Generation - Setting up a Single Node Cluster.
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Mapreduce Tarball
|
||||
@ -32,7 +30,8 @@ $ mvn clean install -DskipTests
|
||||
$ cd hadoop-mapreduce-project
|
||||
$ mvn clean install assembly:assembly -Pnative
|
||||
+---+
|
||||
<<NOTE:>> You will need protoc 2.5.0 installed.
|
||||
<<NOTE:>> You will need {{{http://code.google.com/p/protobuf}protoc 2.5.0}}
|
||||
installed.
|
||||
|
||||
To ignore the native builds in mapreduce you can omit the <<<-Pnative>>> argument
|
||||
for maven. The tarball should be available in <<<target/>>> directory.
|
||||
|
@ -28,10 +28,30 @@ public class TestVersionUtil {
|
||||
// Equal versions are equal.
|
||||
assertEquals(0, VersionUtil.compareVersions("2.0.0", "2.0.0"));
|
||||
assertEquals(0, VersionUtil.compareVersions("2.0.0a", "2.0.0a"));
|
||||
assertEquals(0, VersionUtil.compareVersions("1", "1"));
|
||||
assertEquals(0, VersionUtil.compareVersions(
|
||||
"2.0.0-SNAPSHOT", "2.0.0-SNAPSHOT"));
|
||||
|
||||
|
||||
assertEquals(0, VersionUtil.compareVersions("1", "1"));
|
||||
assertEquals(0, VersionUtil.compareVersions("1", "1.0"));
|
||||
assertEquals(0, VersionUtil.compareVersions("1", "1.0.0"));
|
||||
|
||||
assertEquals(0, VersionUtil.compareVersions("1.0", "1"));
|
||||
assertEquals(0, VersionUtil.compareVersions("1.0", "1.0"));
|
||||
assertEquals(0, VersionUtil.compareVersions("1.0", "1.0.0"));
|
||||
|
||||
assertEquals(0, VersionUtil.compareVersions("1.0.0", "1"));
|
||||
assertEquals(0, VersionUtil.compareVersions("1.0.0", "1.0"));
|
||||
assertEquals(0, VersionUtil.compareVersions("1.0.0", "1.0.0"));
|
||||
|
||||
assertEquals(0, VersionUtil.compareVersions("1.0.0-alpha-1", "1.0.0-a1"));
|
||||
assertEquals(0, VersionUtil.compareVersions("1.0.0-alpha-2", "1.0.0-a2"));
|
||||
assertEquals(0, VersionUtil.compareVersions("1.0.0-alpha1", "1.0.0-alpha-1"));
|
||||
|
||||
assertEquals(0, VersionUtil.compareVersions("1a0", "1.0.0-alpha-0"));
|
||||
assertEquals(0, VersionUtil.compareVersions("1a0", "1-a0"));
|
||||
assertEquals(0, VersionUtil.compareVersions("1.a0", "1-a0"));
|
||||
assertEquals(0, VersionUtil.compareVersions("1.a0", "1.0.0-alpha-0"));
|
||||
|
||||
// Assert that lower versions are lower, and higher versions are higher.
|
||||
assertExpectedValues("1", "2.0.0");
|
||||
assertExpectedValues("1.0.0", "2");
|
||||
@ -51,15 +71,27 @@ public class TestVersionUtil {
|
||||
assertExpectedValues("1.0.2a", "1.0.2ab");
|
||||
assertExpectedValues("1.0.0a1", "1.0.0a2");
|
||||
assertExpectedValues("1.0.0a2", "1.0.0a10");
|
||||
// The 'a' in "1.a" is not followed by digit, thus not treated as "alpha",
|
||||
// and treated larger than "1.0", per maven's ComparableVersion class
|
||||
// implementation.
|
||||
assertExpectedValues("1.0", "1.a");
|
||||
assertExpectedValues("1.0", "1.a0");
|
||||
//The 'a' in "1.a0" is followed by digit, thus treated as "alpha-<digit>"
|
||||
assertExpectedValues("1.a0", "1.0");
|
||||
assertExpectedValues("1a0", "1.0");
|
||||
assertExpectedValues("1.0.1-alpha-1", "1.0.1-alpha-2");
|
||||
assertExpectedValues("1.0.1-beta-1", "1.0.1-beta-2");
|
||||
|
||||
// Snapshot builds precede their eventual releases.
|
||||
assertExpectedValues("1.0-SNAPSHOT", "1.0");
|
||||
assertExpectedValues("1.0", "1.0.0-SNAPSHOT");
|
||||
assertExpectedValues("1.0.0-SNAPSHOT", "1.0");
|
||||
assertExpectedValues("1.0.0-SNAPSHOT", "1.0.0");
|
||||
assertExpectedValues("1.0.0", "1.0.1-SNAPSHOT");
|
||||
assertExpectedValues("1.0.1-SNAPSHOT", "1.0.1");
|
||||
assertExpectedValues("1.0.1-SNAPSHOT", "1.0.2");
|
||||
|
||||
assertExpectedValues("1.0.1-alpha-1", "1.0.1-SNAPSHOT");
|
||||
assertExpectedValues("1.0.1-beta-1", "1.0.1-SNAPSHOT");
|
||||
assertExpectedValues("1.0.1-beta-2", "1.0.1-SNAPSHOT");
|
||||
}
|
||||
|
||||
private static void assertExpectedValues(String lower, String higher) {
|
||||
|
@ -504,6 +504,9 @@ Release 2.4.0 - UNRELEASED
|
||||
HDFS-5788. listLocatedStatus response can be very large. (Nathan Roberts
|
||||
via kihwal)
|
||||
|
||||
HDFS-5781. Use an array to record the mapping between FSEditLogOpCode and
|
||||
the corresponding byte value. (jing9)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn)
|
||||
@ -979,6 +982,9 @@ Release 2.3.0 - UNRELEASED
|
||||
HDFS-5677. Need error checking for HA cluster configuration.
|
||||
(Vincent Sheffer via cos)
|
||||
|
||||
HDFS-5825. Use FileUtils.copyFile() to implement DFSTestUtils.copyFile().
|
||||
(Haohui Mai via Arpit Agarwal)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
@ -1142,6 +1148,15 @@ Release 2.3.0 - UNRELEASED
|
||||
HDFS-5343. When cat command is issued on snapshot files getting unexpected result.
|
||||
(Sathish via umamahesh)
|
||||
|
||||
HDFS-5297. Fix dead links in HDFS site documents. (Akira Ajisaka via
|
||||
Arpit Agarwal)
|
||||
|
||||
HDFS-5830. WebHdfsFileSystem.getFileBlockLocations throws
|
||||
IllegalArgumentException when accessing another cluster. (Yongjun Zhang via
|
||||
Colin Patrick McCabe)
|
||||
|
||||
HDFS-5833. Fix SecondaryNameNode javadoc. (Bangtao Zhou via Arpit Agarwal)
|
||||
|
||||
Release 2.2.0 - 2013-10-13
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -98,9 +98,8 @@ public class LocatedBlock {
|
||||
}
|
||||
this.storageIDs = storageIDs;
|
||||
this.storageTypes = storageTypes;
|
||||
Preconditions.checkArgument(cachedLocs != null,
|
||||
"cachedLocs should not be null, use a different constructor");
|
||||
if (cachedLocs.length == 0) {
|
||||
|
||||
if (cachedLocs == null || cachedLocs.length == 0) {
|
||||
this.cachedLocs = EMPTY_LOCS;
|
||||
} else {
|
||||
this.cachedLocs = cachedLocs;
|
||||
|
@ -69,7 +69,7 @@ public enum FSEditLogOpCodes {
|
||||
OP_MODIFY_CACHE_DIRECTIVE ((byte) 39),
|
||||
OP_UPGRADE_MARKER ((byte) 40),
|
||||
|
||||
// Note that fromByte(..) depends on OP_INVALID being at the last position.
|
||||
// Note that the current range of the valid OP code is 0~127
|
||||
OP_INVALID ((byte) -1);
|
||||
|
||||
private final byte opCode;
|
||||
@ -92,7 +92,22 @@ public enum FSEditLogOpCodes {
|
||||
return opCode;
|
||||
}
|
||||
|
||||
private static final FSEditLogOpCodes[] VALUES = FSEditLogOpCodes.values();
|
||||
private static FSEditLogOpCodes[] VALUES;
|
||||
|
||||
static {
|
||||
byte max = 0;
|
||||
for (FSEditLogOpCodes code : FSEditLogOpCodes.values()) {
|
||||
if (code.getOpCode() > max) {
|
||||
max = code.getOpCode();
|
||||
}
|
||||
}
|
||||
VALUES = new FSEditLogOpCodes[max + 1];
|
||||
for (FSEditLogOpCodes code : FSEditLogOpCodes.values()) {
|
||||
if (code.getOpCode() >= 0) {
|
||||
VALUES[code.getOpCode()] = code;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts byte to FSEditLogOpCodes enum value
|
||||
@ -101,12 +116,9 @@ public enum FSEditLogOpCodes {
|
||||
* @return enum with byte value of opCode
|
||||
*/
|
||||
public static FSEditLogOpCodes fromByte(byte opCode) {
|
||||
if (opCode == -1) {
|
||||
return OP_INVALID;
|
||||
}
|
||||
if (opCode >= 0 && opCode < OP_INVALID.ordinal()) {
|
||||
if (opCode >= 0 && opCode < VALUES.length) {
|
||||
return VALUES[opCode];
|
||||
}
|
||||
return null;
|
||||
return opCode == -1 ? OP_INVALID : null;
|
||||
}
|
||||
}
|
||||
|
@ -90,7 +90,7 @@ import com.google.common.collect.ImmutableList;
|
||||
* The Secondary NameNode is a daemon that periodically wakes
|
||||
* up (determined by the schedule specified in the configuration),
|
||||
* triggers a periodic checkpoint and then goes back to sleep.
|
||||
* The Secondary NameNode uses the ClientProtocol to talk to the
|
||||
* The Secondary NameNode uses the NamenodeProtocol to talk to the
|
||||
* primary NameNode.
|
||||
*
|
||||
**********************************************************/
|
||||
|
@ -19,8 +19,6 @@
|
||||
|
||||
HDFS Federation
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
This guide provides an overview of the HDFS Federation feature and
|
||||
|
@ -18,8 +18,6 @@
|
||||
|
||||
HDFS High Availability
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* {Purpose}
|
||||
|
@ -18,8 +18,6 @@
|
||||
|
||||
HDFS High Availability Using the Quorum Journal Manager
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* {Purpose}
|
||||
|
@ -20,8 +20,6 @@
|
||||
|
||||
Offline Edits Viewer Guide
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Overview
|
||||
|
@ -18,8 +18,6 @@
|
||||
|
||||
Offline Image Viewer Guide
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Overview
|
||||
@ -64,9 +62,9 @@ Offline Image Viewer Guide
|
||||
but no data recorded. The default record delimiter is a tab, but
|
||||
this may be changed via the -delimiter command line argument. This
|
||||
processor is designed to create output that is easily analyzed by
|
||||
other tools, such as [36]Apache Pig. See the [37]Analyzing Results
|
||||
section for further information on using this processor to analyze
|
||||
the contents of fsimage files.
|
||||
other tools, such as {{{http://pig.apache.org}Apache Pig}}. See
|
||||
the {{Analyzing Results}} section for further information on using
|
||||
this processor to analyze the contents of fsimage files.
|
||||
|
||||
[[4]] XML creates an XML document of the fsimage and includes all of the
|
||||
information within the fsimage, similar to the lsr processor. The
|
||||
|
@ -18,8 +18,6 @@
|
||||
|
||||
HDFS Permissions Guide
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Overview
|
||||
@ -55,8 +53,10 @@ HDFS Permissions Guide
|
||||
|
||||
* If the user name matches the owner of foo, then the owner
|
||||
permissions are tested;
|
||||
|
||||
* Else if the group of foo matches any of member of the groups list,
|
||||
then the group permissions are tested;
|
||||
|
||||
* Otherwise the other permissions of foo are tested.
|
||||
|
||||
If a permissions check fails, the client operation fails.
|
||||
|
@ -18,8 +18,6 @@
|
||||
|
||||
HDFS Quotas Guide
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Overview
|
||||
|
@ -108,9 +108,11 @@ HDFS Users Guide
|
||||
The following documents describe how to install and set up a Hadoop
|
||||
cluster:
|
||||
|
||||
* {{Single Node Setup}} for first-time users.
|
||||
* {{{../hadoop-common/SingleCluster.html}Single Node Setup}}
|
||||
for first-time users.
|
||||
|
||||
* {{Cluster Setup}} for large, distributed clusters.
|
||||
* {{{../hadoop-common/ClusterSetup.html}Cluster Setup}}
|
||||
for large, distributed clusters.
|
||||
|
||||
The rest of this document assumes the user is able to set up and run a
|
||||
HDFS with at least one DataNode. For the purpose of this document, both
|
||||
@ -136,7 +138,8 @@ HDFS Users Guide
|
||||
for a command. These commands support most of the normal files system
|
||||
operations like copying files, changing file permissions, etc. It also
|
||||
supports a few HDFS specific operations like changing replication of
|
||||
files. For more information see {{{File System Shell Guide}}}.
|
||||
files. For more information see {{{../hadoop-common/FileSystemShell.html}
|
||||
File System Shell Guide}}.
|
||||
|
||||
** DFSAdmin Command
|
||||
|
||||
@ -169,7 +172,7 @@ HDFS Users Guide
|
||||
of racks and datanodes attached to the tracks as viewed by the
|
||||
NameNode.
|
||||
|
||||
For command usage, see {{{dfsadmin}}}.
|
||||
For command usage, see {{{../hadoop-common/CommandsManual.html#dfsadmin}dfsadmin}}.
|
||||
|
||||
* Secondary NameNode
|
||||
|
||||
@ -203,7 +206,8 @@ HDFS Users Guide
|
||||
So that the check pointed image is always ready to be read by the
|
||||
primary NameNode if necessary.
|
||||
|
||||
For command usage, see {{{secondarynamenode}}}.
|
||||
For command usage,
|
||||
see {{{../hadoop-common/CommandsManual.html#secondarynamenode}secondarynamenode}}.
|
||||
|
||||
* Checkpoint Node
|
||||
|
||||
@ -245,7 +249,7 @@ HDFS Users Guide
|
||||
Multiple checkpoint nodes may be specified in the cluster configuration
|
||||
file.
|
||||
|
||||
For command usage, see {{{namenode}}}.
|
||||
For command usage, see {{{../hadoop-common/CommandsManual.html#namenode}namenode}}.
|
||||
|
||||
* Backup Node
|
||||
|
||||
@ -287,7 +291,7 @@ HDFS Users Guide
|
||||
|
||||
For a complete discussion of the motivation behind the creation of the
|
||||
Backup node and Checkpoint node, see {{{https://issues.apache.org/jira/browse/HADOOP-4539}HADOOP-4539}}.
|
||||
For command usage, see {{{namenode}}}.
|
||||
For command usage, see {{{../hadoop-common/CommandsManual.html#namenode}namenode}}.
|
||||
|
||||
* Import Checkpoint
|
||||
|
||||
@ -310,7 +314,7 @@ HDFS Users Guide
|
||||
verifies that the image in <<<dfs.namenode.checkpoint.dir>>> is consistent,
|
||||
but does not modify it in any way.
|
||||
|
||||
For command usage, see {{{namenode}}}.
|
||||
For command usage, see {{{../hadoop-common/CommandsManual.html#namenode}namenode}}.
|
||||
|
||||
* Rebalancer
|
||||
|
||||
@ -337,7 +341,7 @@ HDFS Users Guide
|
||||
A brief administrator's guide for rebalancer as a PDF is attached to
|
||||
{{{https://issues.apache.org/jira/browse/HADOOP-1652}HADOOP-1652}}.
|
||||
|
||||
For command usage, see {{{balancer}}}.
|
||||
For command usage, see {{{../hadoop-common/CommandsManual.html#balancer}balancer}}.
|
||||
|
||||
* Rack Awareness
|
||||
|
||||
@ -379,8 +383,9 @@ HDFS Users Guide
|
||||
most of the recoverable failures. By default fsck ignores open files
|
||||
but provides an option to select all files during reporting. The HDFS
|
||||
fsck command is not a Hadoop shell command. It can be run as
|
||||
<<<bin/hadoop fsck>>>. For command usage, see {{{fsck}}}. fsck can be run on the
|
||||
whole file system or on a subset of files.
|
||||
<<<bin/hadoop fsck>>>. For command usage, see
|
||||
{{{../hadoop-common/CommandsManual.html#fsck}fsck}}. fsck can be run on
|
||||
the whole file system or on a subset of files.
|
||||
|
||||
* fetchdt
|
||||
|
||||
@ -393,7 +398,8 @@ HDFS Users Guide
|
||||
command. It can be run as <<<bin/hadoop fetchdt DTfile>>>. After you got
|
||||
the token you can run an HDFS command without having Kerberos tickets,
|
||||
by pointing <<<HADOOP_TOKEN_FILE_LOCATION>>> environmental variable to the
|
||||
delegation token file. For command usage, see {{{fetchdt}}} command.
|
||||
delegation token file. For command usage, see
|
||||
{{{../hadoop-common/CommandsManual.html#fetchdt}fetchdt}} command.
|
||||
|
||||
* Recovery Mode
|
||||
|
||||
@ -427,10 +433,11 @@ HDFS Users Guide
|
||||
let alone to restart HDFS from scratch. HDFS allows administrators to
|
||||
go back to earlier version of Hadoop and rollback the cluster to the
|
||||
state it was in before the upgrade. HDFS upgrade is described in more
|
||||
detail in {{{Hadoop Upgrade}}} Wiki page. HDFS can have one such backup at a
|
||||
time. Before upgrading, administrators need to remove existing backup
|
||||
using bin/hadoop dfsadmin <<<-finalizeUpgrade>>> command. The following
|
||||
briefly describes the typical upgrade procedure:
|
||||
detail in {{{http://wiki.apache.org/hadoop/Hadoop_Upgrade}Hadoop Upgrade}}
|
||||
Wiki page. HDFS can have one such backup at a time. Before upgrading,
|
||||
administrators need to remove existing backupusing bin/hadoop dfsadmin
|
||||
<<<-finalizeUpgrade>>> command. The following briefly describes the
|
||||
typical upgrade procedure:
|
||||
|
||||
* Before upgrading Hadoop software, finalize if there an existing
|
||||
backup. <<<dfsadmin -upgradeProgress>>> status can tell if the cluster
|
||||
@ -450,7 +457,7 @@ HDFS Users Guide
|
||||
|
||||
* stop the cluster and distribute earlier version of Hadoop.
|
||||
|
||||
* start the cluster with rollback option. (<<<bin/start-dfs.h -rollback>>>).
|
||||
* start the cluster with rollback option. (<<<bin/start-dfs.sh -rollback>>>).
|
||||
|
||||
* File Permissions and Security
|
||||
|
||||
@ -465,14 +472,15 @@ HDFS Users Guide
|
||||
* Scalability
|
||||
|
||||
Hadoop currently runs on clusters with thousands of nodes. The
|
||||
{{{PoweredBy}}} Wiki page lists some of the organizations that deploy Hadoop
|
||||
on large clusters. HDFS has one NameNode for each cluster. Currently
|
||||
the total memory available on NameNode is the primary scalability
|
||||
limitation. On very large clusters, increasing average size of files
|
||||
stored in HDFS helps with increasing cluster size without increasing
|
||||
memory requirements on NameNode. The default configuration may not
|
||||
suite very large clustes. The {{{FAQ}}} Wiki page lists suggested
|
||||
configuration improvements for large Hadoop clusters.
|
||||
{{{http://wiki.apache.org/hadoop/PoweredBy}PoweredBy}} Wiki page lists
|
||||
some of the organizations that deploy Hadoop on large clusters.
|
||||
HDFS has one NameNode for each cluster. Currently the total memory
|
||||
available on NameNode is the primary scalability limitation.
|
||||
On very large clusters, increasing average size of files stored in
|
||||
HDFS helps with increasing cluster size without increasing memory
|
||||
requirements on NameNode. The default configuration may not suite
|
||||
very large clusters. The {{{http://wiki.apache.org/hadoop/FAQ}FAQ}}
|
||||
Wiki page lists suggested configuration improvements for large Hadoop clusters.
|
||||
|
||||
* Related Documentation
|
||||
|
||||
@ -481,19 +489,22 @@ HDFS Users Guide
|
||||
documentation about Hadoop and HDFS. The following list is a starting
|
||||
point for further exploration:
|
||||
|
||||
* {{{Hadoop Site}}}: The home page for the Apache Hadoop site.
|
||||
* {{{http://hadoop.apache.org}Hadoop Site}}: The home page for
|
||||
the Apache Hadoop site.
|
||||
|
||||
* {{{Hadoop Wiki}}}: The home page (FrontPage) for the Hadoop Wiki. Unlike
|
||||
* {{{http://wiki.apache.org/hadoop/FrontPage}Hadoop Wiki}}:
|
||||
The home page (FrontPage) for the Hadoop Wiki. Unlike
|
||||
the released documentation, which is part of Hadoop source tree,
|
||||
Hadoop Wiki is regularly edited by Hadoop Community.
|
||||
|
||||
* {{{FAQ}}}: The FAQ Wiki page.
|
||||
* {{{http://wiki.apache.org/hadoop/FAQ}FAQ}}: The FAQ Wiki page.
|
||||
|
||||
* {{{Hadoop JavaDoc API}}}.
|
||||
* {{{../../api/index.html}Hadoop JavaDoc API}}.
|
||||
|
||||
* {{{Hadoop User Mailing List}}}: core-user[at]hadoop.apache.org.
|
||||
* Hadoop User Mailing List: user[at]hadoop.apache.org.
|
||||
|
||||
* Explore {{{src/hdfs/hdfs-default.xml}}}. It includes brief description of
|
||||
most of the configuration variables available.
|
||||
* Explore {{{./hdfs-default.xml}hdfs-default.xml}}. It includes
|
||||
brief description of most of the configuration variables available.
|
||||
|
||||
* {{{Hadoop Commands Guide}}}: Hadoop commands usage.
|
||||
* {{{../hadoop-common/CommandsManual.html}Hadoop Commands Guide}}:
|
||||
Hadoop commands usage.
|
||||
|
@ -18,8 +18,6 @@
|
||||
|
||||
HFTP Guide
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* Introduction
|
||||
|
@ -19,8 +19,6 @@
|
||||
|
||||
HDFS Short-Circuit Local Reads
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* {Background}
|
||||
|
@ -18,8 +18,6 @@
|
||||
|
||||
WebHDFS REST API
|
||||
|
||||
\[ {{{./index.html}Go Back}} \]
|
||||
|
||||
%{toc|section=1|fromDepth=0}
|
||||
|
||||
* {Document Conventions}
|
||||
@ -54,7 +52,7 @@ WebHDFS REST API
|
||||
* {{{Status of a File/Directory}<<<GETFILESTATUS>>>}}
|
||||
(see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.getFileStatus)
|
||||
|
||||
* {{<<<LISTSTATUS>>>}}
|
||||
* {{{List a Directory}<<<LISTSTATUS>>>}}
|
||||
(see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.listStatus)
|
||||
|
||||
* {{{Get Content Summary of a Directory}<<<GETCONTENTSUMMARY>>>}}
|
||||
@ -109,7 +107,7 @@ WebHDFS REST API
|
||||
* {{{Append to a File}<<<APPEND>>>}}
|
||||
(see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.append)
|
||||
|
||||
* {{{Concatenate Files}<<<CONCAT>>>}}
|
||||
* {{{Concat File(s)}<<<CONCAT>>>}}
|
||||
(see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.concat)
|
||||
|
||||
* HTTP DELETE
|
||||
@ -871,7 +869,7 @@ Content-Length: 0
|
||||
* {Error Responses}
|
||||
|
||||
When an operation fails, the server may throw an exception.
|
||||
The JSON schema of error responses is defined in {{<<<RemoteException>>> JSON schema}}.
|
||||
The JSON schema of error responses is defined in {{{RemoteException JSON Schema}}}.
|
||||
The table below shows the mapping from exceptions to HTTP response codes.
|
||||
|
||||
** {HTTP Response Codes}
|
||||
@ -1119,7 +1117,7 @@ Transfer-Encoding: chunked
|
||||
See also:
|
||||
{{{FileStatus Properties}<<<FileStatus>>> Properties}},
|
||||
{{{Status of a File/Directory}<<<GETFILESTATUS>>>}},
|
||||
{{{../../api/org/apache/hadoop/fs/FileStatus}FileStatus}}
|
||||
{{{../../api/org/apache/hadoop/fs/FileStatus.html}FileStatus}}
|
||||
|
||||
|
||||
*** {FileStatus Properties}
|
||||
@ -1232,7 +1230,7 @@ var fileStatusProperties =
|
||||
See also:
|
||||
{{{FileStatus Properties}<<<FileStatus>>> Properties}},
|
||||
{{{List a Directory}<<<LISTSTATUS>>>}},
|
||||
{{{../../api/org/apache/hadoop/fs/FileStatus}FileStatus}}
|
||||
{{{../../api/org/apache/hadoop/fs/FileStatus.html}FileStatus}}
|
||||
|
||||
|
||||
** {Long JSON Schema}
|
||||
@ -1275,7 +1273,7 @@ var fileStatusProperties =
|
||||
|
||||
See also:
|
||||
{{{Get Home Directory}<<<GETHOMEDIRECTORY>>>}},
|
||||
{{{../../api/org/apache/hadoop/fs/Path}Path}}
|
||||
{{{../../api/org/apache/hadoop/fs/Path.html}Path}}
|
||||
|
||||
|
||||
** {RemoteException JSON Schema}
|
||||
|
@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs;
|
||||
|
||||
import com.google.common.base.Charsets;
|
||||
import com.google.common.base.Joiner;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
@ -27,7 +29,6 @@ import org.apache.hadoop.fs.*;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.FileSystem.Statistics;
|
||||
import org.apache.hadoop.fs.Options.Rename;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster.NameNodeInfo;
|
||||
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
|
||||
@ -889,21 +890,7 @@ public class DFSTestUtil {
|
||||
|
||||
/** Copy one file's contents into the other **/
|
||||
public static void copyFile(File src, File dest) throws IOException {
|
||||
InputStream in = null;
|
||||
OutputStream out = null;
|
||||
|
||||
try {
|
||||
in = new FileInputStream(src);
|
||||
out = new FileOutputStream(dest);
|
||||
|
||||
byte [] b = new byte[1024];
|
||||
while( in.read(b) > 0 ) {
|
||||
out.write(b);
|
||||
}
|
||||
} finally {
|
||||
if(in != null) in.close();
|
||||
if(out != null) out.close();
|
||||
}
|
||||
FileUtils.copyFile(src, dest);
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
@ -118,6 +118,20 @@ public class TestDFSUtil {
|
||||
assertEquals(0, bs.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test constructing LocatedBlock with null cachedLocs
|
||||
*/
|
||||
@Test
|
||||
public void testLocatedBlockConstructorWithNullCachedLocs() {
|
||||
DatanodeInfo d = DFSTestUtil.getLocalDatanodeInfo();
|
||||
DatanodeInfo[] ds = new DatanodeInfo[1];
|
||||
ds[0] = d;
|
||||
|
||||
ExtendedBlock b1 = new ExtendedBlock("bpid", 1, 1, 1);
|
||||
LocatedBlock l1 = new LocatedBlock(b1, ds, null, null, 0, false, null);
|
||||
final DatanodeInfo[] cachedLocs = l1.getCachedLocations();
|
||||
assertTrue(cachedLocs.length == 0);
|
||||
}
|
||||
|
||||
private Configuration setupAddress(String key) {
|
||||
HdfsConfiguration conf = new HdfsConfiguration();
|
||||
|
@ -59,6 +59,7 @@
|
||||
<item name="CLI Mini Cluster" href="hadoop-project-dist/hadoop-common/CLIMiniCluster.html"/>
|
||||
<item name="Native Libraries" href="hadoop-project-dist/hadoop-common/NativeLibraries.html"/>
|
||||
<item name="Superusers" href="hadoop-project-dist/hadoop-common/Superusers.html"/>
|
||||
<item name="Secure Mode" href="hadoop-project-dist/hadoop-common/SecureMode.html"/>
|
||||
<item name="Service Level Authorization" href="hadoop-project-dist/hadoop-common/ServiceLevelAuth.html"/>
|
||||
<item name="HTTP Authentication" href="hadoop-project-dist/hadoop-common/HttpAuthentication.html"/>
|
||||
</menu>
|
||||
|
Loading…
x
Reference in New Issue
Block a user