diff --git a/hadoop-common-project/hadoop-auth/src/site/apt/Configuration.apt.vm b/hadoop-common-project/hadoop-auth/src/site/apt/Configuration.apt.vm index f2fe11d8f6..61bd857515 100644 --- a/hadoop-common-project/hadoop-auth/src/site/apt/Configuration.apt.vm +++ b/hadoop-common-project/hadoop-auth/src/site/apt/Configuration.apt.vm @@ -24,8 +24,7 @@ Configuration * Server Side Configuration Setup - The {{{./apidocs/org/apache/hadoop/auth/server/AuthenticationFilter.html} - AuthenticationFilter filter}} is Hadoop Auth's server side component. + The AuthenticationFilter filter is Hadoop Auth's server side component. This filter must be configured in front of all the web application resources that required authenticated requests. For example: @@ -46,9 +45,7 @@ Configuration must start with the prefix. The default value is no prefix. * <<<[PREFIX.]type>>>: the authentication type keyword (<<>> or - <<>>) or a - {{{./apidocs/org/apache/hadoop/auth/server/AuthenticationHandler.html} - Authentication handler implementation}}. + <<>>) or a Authentication handler implementation. * <<<[PREFIX.]signature.secret>>>: The secret to SHA-sign the generated authentication tokens. If a secret is not provided a random secret is diff --git a/hadoop-common-project/hadoop-auth/src/site/apt/index.apt.vm b/hadoop-common-project/hadoop-auth/src/site/apt/index.apt.vm index 26fc2492ca..6051f8cbf2 100644 --- a/hadoop-common-project/hadoop-auth/src/site/apt/index.apt.vm +++ b/hadoop-common-project/hadoop-auth/src/site/apt/index.apt.vm @@ -52,7 +52,3 @@ Hadoop Auth, Java HTTP SPNEGO ${project.version} * {{{./BuildingIt.html}Building It}} - * {{{./apidocs/index.html}JavaDocs}} - - * {{{./dependencies.html}Dependencies}} - diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 7595563d4e..0a89e7d10c 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -536,6 +536,15 @@ Release 2.4.0 - UNRELEASED HADOOP-10252. HttpServer can't start if hostname is not specified. (Jimmy Xiang via atm) + HADOOP-10203. Connection leak in + Jets3tNativeFileSystemStore#retrieveMetadata. (Andrei Savu via atm) + + HADOOP-10250. VersionUtil returns wrong value when comparing two versions. + (Yongjun Zhang via atm) + + HADOOP-10288. Explicit reference to Log4JLogger breaks non-log4j users + (todd) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES @@ -559,6 +568,12 @@ Release 2.3.0 - UNRELEASED HADOOP-10248. Property name should be included in the exception where property value is null (Akira AJISAKA via umamahesh) + HADOOP-10086. User document for authentication in secure cluster. + (Masatake Iwasaki via Arpit Agarwal) + + HADOOP-10274 Lower the logging level from ERROR to WARN for UGI.doAs method + (Takeshi Miao via stack) + OPTIMIZATIONS HADOOP-10142. Avoid groups lookup for unprivileged users such as "dr.who" @@ -637,6 +652,12 @@ Release 2.3.0 - UNRELEASED HADOOP-10167. Mark hadoop-common source as UTF-8 in Maven pom files / refactoring (Mikhail Antonov via cos) + HADOOP-9982. Fix dead links in hadoop site docs. (Akira Ajisaka via Arpit + Agarwal) + + HADOOP-10212. Incorrect compile command in Native Library document. + (Akira Ajisaka via Arpit Agarwal) + Release 2.2.0 - 2013-10-13 INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml b/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml index bf4da979ae..3e5661ea0d 100644 --- a/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml +++ b/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml @@ -364,4 +364,11 @@ + + + + + + + diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java index e05ed09f58..4926618705 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java @@ -110,23 +110,29 @@ class Jets3tNativeFileSystemStore implements NativeFileSystemStore { handleS3ServiceException(e); } } - + @Override public FileMetadata retrieveMetadata(String key) throws IOException { + StorageObject object = null; try { if(LOG.isDebugEnabled()) { LOG.debug("Getting metadata for key: " + key + " from bucket:" + bucket.getName()); } - S3Object object = s3Service.getObject(bucket.getName(), key); + object = s3Service.getObjectDetails(bucket.getName(), key); return new FileMetadata(key, object.getContentLength(), object.getLastModifiedDate().getTime()); - } catch (S3ServiceException e) { + + } catch (ServiceException e) { // Following is brittle. Is there a better way? - if (e.getS3ErrorCode().matches("NoSuchKey")) { + if ("NoSuchKey".equals(e.getErrorCode())) { return null; //return null if key not found } - handleS3ServiceException(e); + handleServiceException(e); return null; //never returned - keep compiler happy + } finally { + if (object != null) { + object.closeDataInputStream(); + } } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpRequestLog.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpRequestLog.java index eb8968bcc4..52d985042a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpRequestLog.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpRequestLog.java @@ -53,7 +53,17 @@ public class HttpRequestLog { String appenderName = name + "requestlog"; Log logger = LogFactory.getLog(loggerName); - if (logger instanceof Log4JLogger) { + boolean isLog4JLogger;; + try { + isLog4JLogger = logger instanceof Log4JLogger; + } catch (NoClassDefFoundError err) { + // In some dependent projects, log4j may not even be on the classpath at + // runtime, in which case the above instanceof check will throw + // NoClassDefFoundError. + LOG.debug("Could not load Log4JLogger class", err); + isLog4JLogger = false; + } + if (isLog4JLogger) { Log4JLogger httpLog4JLog = (Log4JLogger)logger; Logger httpLogger = httpLog4JLog.getLogger(); Appender appender = null; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java index dde5dcd3b0..178a472350 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java @@ -1560,7 +1560,7 @@ public class UserGroupInformation { return Subject.doAs(subject, action); } catch (PrivilegedActionException pae) { Throwable cause = pae.getCause(); - LOG.error("PriviledgedActionException as:"+this+" cause:"+cause); + LOG.warn("PriviledgedActionException as:"+this+" cause:"+cause); if (cause instanceof IOException) { throw (IOException) cause; } else if (cause instanceof Error) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ComparableVersion.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ComparableVersion.java new file mode 100644 index 0000000000..a57342fa88 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ComparableVersion.java @@ -0,0 +1,479 @@ +// Code source of this file: +// http://grepcode.com/file/repo1.maven.org/maven2/ +// org.apache.maven/maven-artifact/3.1.1/ +// org/apache/maven/artifact/versioning/ComparableVersion.java/ +// +// Modifications made on top of the source: +// 1. Changed +// package org.apache.maven.artifact.versioning; +// to +// package org.apache.hadoop.util; +// 2. Removed author tags to clear hadoop author tag warning +// author Kenney Westerhof +// author Hervé Boutemy +// +package org.apache.hadoop.util; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; +import java.util.Locale; +import java.util.Properties; +import java.util.Stack; + +/** + * Generic implementation of version comparison. + * + *

Features: + *

    + *
  • mixing of '-' (dash) and '.' (dot) separators,
  • + *
  • transition between characters and digits also constitutes a separator: + * 1.0alpha1 => [1, 0, alpha, 1]
  • + *
  • unlimited number of version components,
  • + *
  • version components in the text can be digits or strings,
  • + *
  • strings are checked for well-known qualifiers and the qualifier ordering is used for version ordering. + * Well-known qualifiers (case insensitive) are:
      + *
    • alpha or a
    • + *
    • beta or b
    • + *
    • milestone or m
    • + *
    • rc or cr
    • + *
    • snapshot
    • + *
    • (the empty string) or ga or final
    • + *
    • sp
    • + *
    + * Unknown qualifiers are considered after known qualifiers, with lexical order (always case insensitive), + *
  • + *
  • a dash usually precedes a qualifier, and is always less important than something preceded with a dot.
  • + *

+ * + * @see "Versioning" on Maven Wiki + */ +public class ComparableVersion + implements Comparable +{ + private String value; + + private String canonical; + + private ListItem items; + + private interface Item + { + int INTEGER_ITEM = 0; + int STRING_ITEM = 1; + int LIST_ITEM = 2; + + int compareTo( Item item ); + + int getType(); + + boolean isNull(); + } + + /** + * Represents a numeric item in the version item list. + */ + private static class IntegerItem + implements Item + { + private static final BigInteger BIG_INTEGER_ZERO = new BigInteger( "0" ); + + private final BigInteger value; + + public static final IntegerItem ZERO = new IntegerItem(); + + private IntegerItem() + { + this.value = BIG_INTEGER_ZERO; + } + + public IntegerItem( String str ) + { + this.value = new BigInteger( str ); + } + + public int getType() + { + return INTEGER_ITEM; + } + + public boolean isNull() + { + return BIG_INTEGER_ZERO.equals( value ); + } + + public int compareTo( Item item ) + { + if ( item == null ) + { + return BIG_INTEGER_ZERO.equals( value ) ? 0 : 1; // 1.0 == 1, 1.1 > 1 + } + + switch ( item.getType() ) + { + case INTEGER_ITEM: + return value.compareTo( ( (IntegerItem) item ).value ); + + case STRING_ITEM: + return 1; // 1.1 > 1-sp + + case LIST_ITEM: + return 1; // 1.1 > 1-1 + + default: + throw new RuntimeException( "invalid item: " + item.getClass() ); + } + } + + public String toString() + { + return value.toString(); + } + } + + /** + * Represents a string in the version item list, usually a qualifier. + */ + private static class StringItem + implements Item + { + private static final String[] QUALIFIERS = { "alpha", "beta", "milestone", "rc", "snapshot", "", "sp" }; + + private static final List _QUALIFIERS = Arrays.asList( QUALIFIERS ); + + private static final Properties ALIASES = new Properties(); + static + { + ALIASES.put( "ga", "" ); + ALIASES.put( "final", "" ); + ALIASES.put( "cr", "rc" ); + } + + /** + * A comparable value for the empty-string qualifier. This one is used to determine if a given qualifier makes + * the version older than one without a qualifier, or more recent. + */ + private static final String RELEASE_VERSION_INDEX = String.valueOf( _QUALIFIERS.indexOf( "" ) ); + + private String value; + + public StringItem( String value, boolean followedByDigit ) + { + if ( followedByDigit && value.length() == 1 ) + { + // a1 = alpha-1, b1 = beta-1, m1 = milestone-1 + switch ( value.charAt( 0 ) ) + { + case 'a': + value = "alpha"; + break; + case 'b': + value = "beta"; + break; + case 'm': + value = "milestone"; + break; + } + } + this.value = ALIASES.getProperty( value , value ); + } + + public int getType() + { + return STRING_ITEM; + } + + public boolean isNull() + { + return ( comparableQualifier( value ).compareTo( RELEASE_VERSION_INDEX ) == 0 ); + } + + /** + * Returns a comparable value for a qualifier. + * + * This method takes into account the ordering of known qualifiers then unknown qualifiers with lexical ordering. + * + * just returning an Integer with the index here is faster, but requires a lot of if/then/else to check for -1 + * or QUALIFIERS.size and then resort to lexical ordering. Most comparisons are decided by the first character, + * so this is still fast. If more characters are needed then it requires a lexical sort anyway. + * + * @param qualifier + * @return an equivalent value that can be used with lexical comparison + */ + public static String comparableQualifier( String qualifier ) + { + int i = _QUALIFIERS.indexOf( qualifier ); + + return i == -1 ? ( _QUALIFIERS.size() + "-" + qualifier ) : String.valueOf( i ); + } + + public int compareTo( Item item ) + { + if ( item == null ) + { + // 1-rc < 1, 1-ga > 1 + return comparableQualifier( value ).compareTo( RELEASE_VERSION_INDEX ); + } + switch ( item.getType() ) + { + case INTEGER_ITEM: + return -1; // 1.any < 1.1 ? + + case STRING_ITEM: + return comparableQualifier( value ).compareTo( comparableQualifier( ( (StringItem) item ).value ) ); + + case LIST_ITEM: + return -1; // 1.any < 1-1 + + default: + throw new RuntimeException( "invalid item: " + item.getClass() ); + } + } + + public String toString() + { + return value; + } + } + + /** + * Represents a version list item. This class is used both for the global item list and for sub-lists (which start + * with '-(number)' in the version specification). + */ + private static class ListItem + extends ArrayList + implements Item + { + public int getType() + { + return LIST_ITEM; + } + + public boolean isNull() + { + return ( size() == 0 ); + } + + void normalize() + { + for ( ListIterator iterator = listIterator( size() ); iterator.hasPrevious(); ) + { + Item item = iterator.previous(); + if ( item.isNull() ) + { + iterator.remove(); // remove null trailing items: 0, "", empty list + } + else + { + break; + } + } + } + + public int compareTo( Item item ) + { + if ( item == null ) + { + if ( size() == 0 ) + { + return 0; // 1-0 = 1- (normalize) = 1 + } + Item first = get( 0 ); + return first.compareTo( null ); + } + switch ( item.getType() ) + { + case INTEGER_ITEM: + return -1; // 1-1 < 1.0.x + + case STRING_ITEM: + return 1; // 1-1 > 1-sp + + case LIST_ITEM: + Iterator left = iterator(); + Iterator right = ( (ListItem) item ).iterator(); + + while ( left.hasNext() || right.hasNext() ) + { + Item l = left.hasNext() ? left.next() : null; + Item r = right.hasNext() ? right.next() : null; + + // if this is shorter, then invert the compare and mul with -1 + int result = l == null ? -1 * r.compareTo( l ) : l.compareTo( r ); + + if ( result != 0 ) + { + return result; + } + } + + return 0; + + default: + throw new RuntimeException( "invalid item: " + item.getClass() ); + } + } + + public String toString() + { + StringBuilder buffer = new StringBuilder( "(" ); + for ( Iterator iter = iterator(); iter.hasNext(); ) + { + buffer.append( iter.next() ); + if ( iter.hasNext() ) + { + buffer.append( ',' ); + } + } + buffer.append( ')' ); + return buffer.toString(); + } + } + + public ComparableVersion( String version ) + { + parseVersion( version ); + } + + public final void parseVersion( String version ) + { + this.value = version; + + items = new ListItem(); + + version = version.toLowerCase( Locale.ENGLISH ); + + ListItem list = items; + + Stack stack = new Stack(); + stack.push( list ); + + boolean isDigit = false; + + int startIndex = 0; + + for ( int i = 0; i < version.length(); i++ ) + { + char c = version.charAt( i ); + + if ( c == '.' ) + { + if ( i == startIndex ) + { + list.add( IntegerItem.ZERO ); + } + else + { + list.add( parseItem( isDigit, version.substring( startIndex, i ) ) ); + } + startIndex = i + 1; + } + else if ( c == '-' ) + { + if ( i == startIndex ) + { + list.add( IntegerItem.ZERO ); + } + else + { + list.add( parseItem( isDigit, version.substring( startIndex, i ) ) ); + } + startIndex = i + 1; + + if ( isDigit ) + { + list.normalize(); // 1.0-* = 1-* + + if ( ( i + 1 < version.length() ) && Character.isDigit( version.charAt( i + 1 ) ) ) + { + // new ListItem only if previous were digits and new char is a digit, + // ie need to differentiate only 1.1 from 1-1 + list.add( list = new ListItem() ); + + stack.push( list ); + } + } + } + else if ( Character.isDigit( c ) ) + { + if ( !isDigit && i > startIndex ) + { + list.add( new StringItem( version.substring( startIndex, i ), true ) ); + startIndex = i; + } + + isDigit = true; + } + else + { + if ( isDigit && i > startIndex ) + { + list.add( parseItem( true, version.substring( startIndex, i ) ) ); + startIndex = i; + } + + isDigit = false; + } + } + + if ( version.length() > startIndex ) + { + list.add( parseItem( isDigit, version.substring( startIndex ) ) ); + } + + while ( !stack.isEmpty() ) + { + list = (ListItem) stack.pop(); + list.normalize(); + } + + canonical = items.toString(); + } + + private static Item parseItem( boolean isDigit, String buf ) + { + return isDigit ? new IntegerItem( buf ) : new StringItem( buf, false ); + } + + public int compareTo( ComparableVersion o ) + { + return items.compareTo( o.items ); + } + + public String toString() + { + return value; + } + + public boolean equals( Object o ) + { + return ( o instanceof ComparableVersion ) && canonical.equals( ( (ComparableVersion) o ).canonical ); + } + + public int hashCode() + { + return canonical.hashCode(); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionUtil.java index 09a272317f..3e14fa91f6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionUtil.java @@ -17,55 +17,17 @@ */ package org.apache.hadoop.util; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - import org.apache.hadoop.classification.InterfaceAudience; -import com.google.common.collect.ComparisonChain; - +/** + * A wrapper class to maven's ComparableVersion class, to comply + * with maven's version name string convention + */ @InterfaceAudience.Private public abstract class VersionUtil { - - private static final Pattern COMPONENT_GROUPS = Pattern.compile("(\\d+)|(\\D+)"); - /** - * Suffix added by maven for nightly builds and other snapshot releases. - * These releases are considered to precede the non-SNAPSHOT version - * with the same version number. - */ - private static final String SNAPSHOT_SUFFIX = "-SNAPSHOT"; - - /** - * This function splits the two versions on "." and performs a - * naturally-ordered comparison of the resulting components. For example, the - * version string "0.3" is considered to precede "0.20", despite the fact that - * lexical comparison would consider "0.20" to precede "0.3". This method of - * comparison is similar to the method used by package versioning systems like - * deb and RPM. - * - * Version components are compared numerically whenever possible, however a - * version component can contain non-numeric characters. When a non-numeric - * group of characters is found in a version component, this group is compared - * with the similarly-indexed group in the other version component. If the - * other group is numeric, then the numeric group is considered to precede the - * non-numeric group. If both groups are non-numeric, then a lexical - * comparison is performed. - * - * If two versions have a different number of components, then only the lower - * number of components are compared. If those components are identical - * between the two versions, then the version with fewer components is - * considered to precede the version with more components. - * - * In addition to the above rules, there is one special case: maven SNAPSHOT - * releases are considered to precede a non-SNAPSHOT release with an - * otherwise identical version number. For example, 2.0-SNAPSHOT precedes - * 2.0. - * - * This function returns a negative integer if version1 precedes version2, a - * positive integer if version2 precedes version1, and 0 if and only if the - * two versions' components are identical in value and cardinality. - * + * Compares two version name strings using maven's ComparableVersion class. + * * @param version1 * the first version to compare * @param version2 @@ -75,58 +37,8 @@ public abstract class VersionUtil { * versions are equal. */ public static int compareVersions(String version1, String version2) { - boolean isSnapshot1 = version1.endsWith(SNAPSHOT_SUFFIX); - boolean isSnapshot2 = version2.endsWith(SNAPSHOT_SUFFIX); - version1 = stripSnapshotSuffix(version1); - version2 = stripSnapshotSuffix(version2); - - String[] version1Parts = version1.split("\\."); - String[] version2Parts = version2.split("\\."); - - for (int i = 0; i < version1Parts.length && i < version2Parts.length; i++) { - String component1 = version1Parts[i]; - String component2 = version2Parts[i]; - if (!component1.equals(component2)) { - Matcher matcher1 = COMPONENT_GROUPS.matcher(component1); - Matcher matcher2 = COMPONENT_GROUPS.matcher(component2); - - while (matcher1.find() && matcher2.find()) { - String group1 = matcher1.group(); - String group2 = matcher2.group(); - if (!group1.equals(group2)) { - if (isNumeric(group1) && isNumeric(group2)) { - return Integer.parseInt(group1) - Integer.parseInt(group2); - } else if (!isNumeric(group1) && !isNumeric(group2)) { - return group1.compareTo(group2); - } else { - return isNumeric(group1) ? -1 : 1; - } - } - } - return component1.length() - component2.length(); - } - } - - return ComparisonChain.start() - .compare(version1Parts.length, version2Parts.length) - .compare(isSnapshot2, isSnapshot1) - .result(); - } - - private static String stripSnapshotSuffix(String version) { - if (version.endsWith(SNAPSHOT_SUFFIX)) { - return version.substring(0, version.length() - SNAPSHOT_SUFFIX.length()); - } else { - return version; - } - } - - private static boolean isNumeric(String s) { - try { - Integer.parseInt(s); - return true; - } catch (NumberFormatException nfe) { - return false; - } + ComparableVersion v1 = new ComparableVersion(version1); + ComparableVersion v2 = new ComparableVersion(version2); + return v1.compareTo(v2); } } diff --git a/hadoop-common-project/hadoop-common/src/site/apt/CLIMiniCluster.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/CLIMiniCluster.apt.vm index 51a5a9afac..2d12c3905c 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/CLIMiniCluster.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/CLIMiniCluster.apt.vm @@ -18,8 +18,6 @@ Hadoop MapReduce Next Generation - CLI MiniCluster. - \[ {{{./index.html}Go Back}} \] - %{toc|section=1|fromDepth=0} * {Purpose} @@ -42,7 +40,8 @@ Hadoop MapReduce Next Generation - CLI MiniCluster. $ mvn clean install -DskipTests $ mvn package -Pdist -Dtar -DskipTests -Dmaven.javadoc.skip +---+ - <> You will need protoc 2.5.0 installed. + <> You will need {{{http://code.google.com/p/protobuf/}protoc 2.5.0}} + installed. The tarball should be available in <<>> directory. diff --git a/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm index 4857cc797a..b4fef5d7a6 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm @@ -16,8 +16,6 @@ --- ${maven.build.timestamp} - \[ {{{../index.html}Go Back}} \] - %{toc|section=1|fromDepth=0} Hadoop MapReduce Next Generation - Cluster Setup @@ -29,7 +27,7 @@ Hadoop MapReduce Next Generation - Cluster Setup with thousands of nodes. To play with Hadoop, you may first want to install it on a single - machine (see {{{SingleCluster}Single Node Setup}}). + machine (see {{{./SingleCluster.html}Single Node Setup}}). * {Prerequisites} @@ -571,440 +569,6 @@ $ $HADOOP_YARN_HOME/sbin/yarn-daemon.sh stop proxyserver --config $HADOOP_CONF_D $ $HADOOP_PREFIX/sbin/mr-jobhistory-daemon.sh stop historyserver --config $HADOOP_CONF_DIR ---- -* {Running Hadoop in Secure Mode} - - This section deals with important parameters to be specified in - to run Hadoop in <> with strong, Kerberos-based - authentication. - - * <<>> - - Ensure that HDFS and YARN daemons run as different Unix users, for e.g. - <<>> and <<>>. Also, ensure that the MapReduce JobHistory - server runs as user <<>>. - - It's recommended to have them share a Unix group, for e.g. <<>>. - -*---------------+----------------------------------------------------------------------+ -|| User:Group || Daemons | -*---------------+----------------------------------------------------------------------+ -| hdfs:hadoop | NameNode, Secondary NameNode, Checkpoint Node, Backup Node, DataNode | -*---------------+----------------------------------------------------------------------+ -| yarn:hadoop | ResourceManager, NodeManager | -*---------------+----------------------------------------------------------------------+ -| mapred:hadoop | MapReduce JobHistory Server | -*---------------+----------------------------------------------------------------------+ - - * <<>> - - The following table lists various paths on HDFS and local filesystems (on - all nodes) and recommended permissions: - -*-------------------+-------------------+------------------+------------------+ -|| Filesystem || Path || User:Group || Permissions | -*-------------------+-------------------+------------------+------------------+ -| local | <<>> | hdfs:hadoop | drwx------ | -*-------------------+-------------------+------------------+------------------+ -| local | <<>> | hdfs:hadoop | drwx------ | -*-------------------+-------------------+------------------+------------------+ -| local | $HADOOP_LOG_DIR | hdfs:hadoop | drwxrwxr-x | -*-------------------+-------------------+------------------+------------------+ -| local | $YARN_LOG_DIR | yarn:hadoop | drwxrwxr-x | -*-------------------+-------------------+------------------+------------------+ -| local | <<>> | yarn:hadoop | drwxr-xr-x | -*-------------------+-------------------+------------------+------------------+ -| local | <<>> | yarn:hadoop | drwxr-xr-x | -*-------------------+-------------------+------------------+------------------+ -| local | container-executor | root:hadoop | --Sr-s--- | -*-------------------+-------------------+------------------+------------------+ -| local | <<>> | root:hadoop | r-------- | -*-------------------+-------------------+------------------+------------------+ -| hdfs | / | hdfs:hadoop | drwxr-xr-x | -*-------------------+-------------------+------------------+------------------+ -| hdfs | /tmp | hdfs:hadoop | drwxrwxrwxt | -*-------------------+-------------------+------------------+------------------+ -| hdfs | /user | hdfs:hadoop | drwxr-xr-x | -*-------------------+-------------------+------------------+------------------+ -| hdfs | <<>> | yarn:hadoop | drwxrwxrwxt | -*-------------------+-------------------+------------------+------------------+ -| hdfs | <<>> | mapred:hadoop | | -| | | | drwxrwxrwxt | -*-------------------+-------------------+------------------+------------------+ -| hdfs | <<>> | mapred:hadoop | | -| | | | drwxr-x--- | -*-------------------+-------------------+------------------+------------------+ - - * Kerberos Keytab files - - * HDFS - - The NameNode keytab file, on the NameNode host, should look like the - following: - ----- -$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/nn.service.keytab -Keytab name: FILE:/etc/security/keytab/nn.service.keytab -KVNO Timestamp Principal - 4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) ----- - - The Secondary NameNode keytab file, on that host, should look like the - following: - ----- -$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/sn.service.keytab -Keytab name: FILE:/etc/security/keytab/sn.service.keytab -KVNO Timestamp Principal - 4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) ----- - - The DataNode keytab file, on each host, should look like the following: - ----- -$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/dn.service.keytab -Keytab name: FILE:/etc/security/keytab/dn.service.keytab -KVNO Timestamp Principal - 4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) ----- - - * YARN - - The ResourceManager keytab file, on the ResourceManager host, should look - like the following: - ----- -$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/rm.service.keytab -Keytab name: FILE:/etc/security/keytab/rm.service.keytab -KVNO Timestamp Principal - 4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) ----- - - The NodeManager keytab file, on each host, should look like the following: - ----- -$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/nm.service.keytab -Keytab name: FILE:/etc/security/keytab/nm.service.keytab -KVNO Timestamp Principal - 4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) ----- - - * MapReduce JobHistory Server - - The MapReduce JobHistory Server keytab file, on that host, should look - like the following: - ----- -$ /usr/kerberos/bin/klist -e -k -t /etc/security/keytab/jhs.service.keytab -Keytab name: FILE:/etc/security/keytab/jhs.service.keytab -KVNO Timestamp Principal - 4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) - 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) ----- - -** Configuration in Secure Mode - - * <<>> - -*-------------------------+-------------------------+------------------------+ -|| Parameter || Value || Notes | -*-------------------------+-------------------------+------------------------+ -| <<>> | | is non-secure. | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | | Enable RPC service-level authorization. | -*-------------------------+-------------------------+------------------------+ - - * <<>> - - * Configurations for NameNode: - -*-------------------------+-------------------------+------------------------+ -|| Parameter || Value || Notes | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | | Enable HDFS block access tokens for secure operations. | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | | This value is deprecated. Use dfs.http.policy | -*-------------------------+-------------------------+------------------------+ -| <<>> | or or | | -| | | HTTPS_ONLY turns off http access | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -*-------------------------+-------------------------+------------------------+ -| <<>> | <50470> | | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | | Kerberos keytab file for the NameNode. | -*-------------------------+-------------------------+------------------------+ -| <<>> | nn/_HOST@REALM.TLD | | -| | | Kerberos principal name for the NameNode. | -*-------------------------+-------------------------+------------------------+ -| <<>> | host/_HOST@REALM.TLD | | -| | | HTTPS Kerberos principal name for the NameNode. | -*-------------------------+-------------------------+------------------------+ - - * Configurations for Secondary NameNode: - -*-------------------------+-------------------------+------------------------+ -|| Parameter || Value || Notes | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -*-------------------------+-------------------------+------------------------+ -| <<>> | <50470> | | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | | | -| | | Kerberos keytab file for the NameNode. | -*-------------------------+-------------------------+------------------------+ -| <<>> | sn/_HOST@REALM.TLD | | -| | | Kerberos principal name for the Secondary NameNode. | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | host/_HOST@REALM.TLD | | -| | | HTTPS Kerberos principal name for the Secondary NameNode. | -*-------------------------+-------------------------+------------------------+ - - * Configurations for DataNode: - -*-------------------------+-------------------------+------------------------+ -|| Parameter || Value || Notes | -*-------------------------+-------------------------+------------------------+ -| <<>> | 700 | | -*-------------------------+-------------------------+------------------------+ -| <<>> | <0.0.0.0:2003> | | -*-------------------------+-------------------------+------------------------+ -| <<>> | <0.0.0.0:2005> | | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | | Kerberos keytab file for the DataNode. | -*-------------------------+-------------------------+------------------------+ -| <<>> | dn/_HOST@REALM.TLD | | -| | | Kerberos principal name for the DataNode. | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | host/_HOST@REALM.TLD | | -| | | HTTPS Kerberos principal name for the DataNode. | -*-------------------------+-------------------------+------------------------+ - - * <<>> - - * WebAppProxy - - The <<>> provides a proxy between the web applications - exported by an application and an end user. If security is enabled - it will warn users before accessing a potentially unsafe web application. - Authentication and authorization using the proxy is handled just like - any other privileged web application. - -*-------------------------+-------------------------+------------------------+ -|| Parameter || Value || Notes | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | <<>> host:port for proxy to AM web apps. | | -| | | if this is the same as <<>>| -| | | or it is not defined then the <<>> will run the proxy| -| | | otherwise a standalone proxy server will need to be launched.| -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | | | -| | | Kerberos keytab file for the WebAppProxy. | -*-------------------------+-------------------------+------------------------+ -| <<>> | wap/_HOST@REALM.TLD | | -| | | Kerberos principal name for the WebAppProxy. | -*-------------------------+-------------------------+------------------------+ - - * LinuxContainerExecutor - - A <<>> used by YARN framework which define how any - launched and controlled. - - The following are the available in Hadoop YARN: - -*--------------------------------------+--------------------------------------+ -|| ContainerExecutor || Description | -*--------------------------------------+--------------------------------------+ -| <<>> | | -| | The default executor which YARN uses to manage container execution. | -| | The container process has the same Unix user as the NodeManager. | -*--------------------------------------+--------------------------------------+ -| <<>> | | -| | Supported only on GNU/Linux, this executor runs the containers as either the | -| | YARN user who submitted the application (when full security is enabled) or | -| | as a dedicated user (defaults to nobody) when full security is not enabled. | -| | When full security is enabled, this executor requires all user accounts to be | -| | created on the cluster nodes where the containers are launched. It uses | -| | a executable that is included in the Hadoop distribution. | -| | The NodeManager uses this executable to launch and kill containers. | -| | The setuid executable switches to the user who has submitted the | -| | application and launches or kills the containers. For maximum security, | -| | this executor sets up restricted permissions and user/group ownership of | -| | local files and directories used by the containers such as the shared | -| | objects, jars, intermediate files, log files etc. Particularly note that, | -| | because of this, except the application owner and NodeManager, no other | -| | user can access any of the local files/directories including those | -| | localized as part of the distributed cache. | -*--------------------------------------+--------------------------------------+ - - To build the LinuxContainerExecutor executable run: - ----- - $ mvn package -Dcontainer-executor.conf.dir=/etc/hadoop/ ----- - - The path passed in <<<-Dcontainer-executor.conf.dir>>> should be the - path on the cluster nodes where a configuration file for the setuid - executable should be located. The executable should be installed in - $HADOOP_YARN_HOME/bin. - - The executable must have specific permissions: 6050 or --Sr-s--- - permissions user-owned by (super-user) and group-owned by a - special group (e.g. <<>>) of which the NodeManager Unix user is - the group member and no ordinary application user is. If any application - user belongs to this special group, security will be compromised. This - special group name should be specified for the configuration property - <<>> in both - <<>> and <<>>. - - For example, let's say that the NodeManager is run as user who is - part of the groups users and , any of them being the primary group. - Let also be that has both and another user - (application submitter) as its members, and does not - belong to . Going by the above description, the setuid/setgid - executable should be set 6050 or --Sr-s--- with user-owner as and - group-owner as which has as its member (and not - which has also as its member besides ). - - The LinuxTaskController requires that paths including and leading up to - the directories specified in <<>> and - <<>> to be set 755 permissions as described - above in the table on permissions on directories. - - * <<>> - - The executable requires a configuration file called - <<>> to be present in the configuration - directory passed to the mvn target mentioned above. - - The configuration file must be owned by the user running NodeManager - (user <<>> in the above example), group-owned by anyone and - should have the permissions 0400 or r--------. - - The executable requires following configuration items to be present - in the <<>> file. The items should be - mentioned as simple key=value pairs, one per-line: - -*-------------------------+-------------------------+------------------------+ -|| Parameter || Value || Notes | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | | Unix group of the NodeManager. The group owner of the | -| | | binary should be this group. Should be same as the | -| | | value with which the NodeManager is configured. This configuration is | -| | | required for validating the secure access of the | -| | | binary. | -*-------------------------+-------------------------+------------------------+ -| <<>> | hfds,yarn,mapred,bin | Banned users. | -*-------------------------+-------------------------+------------------------+ -| <<>> | foo,bar | Allowed system users. | -*-------------------------+-------------------------+------------------------+ -| <<>> | 1000 | Prevent other super-users. | -*-------------------------+-------------------------+------------------------+ - - To re-cap, here are the local file-sysytem permissions required for the - various paths related to the <<>>: - -*-------------------+-------------------+------------------+------------------+ -|| Filesystem || Path || User:Group || Permissions | -*-------------------+-------------------+------------------+------------------+ -| local | container-executor | root:hadoop | --Sr-s--- | -*-------------------+-------------------+------------------+------------------+ -| local | <<>> | root:hadoop | r-------- | -*-------------------+-------------------+------------------+------------------+ -| local | <<>> | yarn:hadoop | drwxr-xr-x | -*-------------------+-------------------+------------------+------------------+ -| local | <<>> | yarn:hadoop | drwxr-xr-x | -*-------------------+-------------------+------------------+------------------+ - - * Configurations for ResourceManager: - -*-------------------------+-------------------------+------------------------+ -|| Parameter || Value || Notes | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | | | -| | | Kerberos keytab file for the ResourceManager. | -*-------------------------+-------------------------+------------------------+ -| <<>> | rm/_HOST@REALM.TLD | | -| | | Kerberos principal name for the ResourceManager. | -*-------------------------+-------------------------+------------------------+ - - * Configurations for NodeManager: - -*-------------------------+-------------------------+------------------------+ -|| Parameter || Value || Notes | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | | Kerberos keytab file for the NodeManager. | -*-------------------------+-------------------------+------------------------+ -| <<>> | nm/_HOST@REALM.TLD | | -| | | Kerberos principal name for the NodeManager. | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | <<>> | -| | | Use LinuxContainerExecutor. | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | | Unix group of the NodeManager. | -*-------------------------+-------------------------+------------------------+ - - * <<>> - - * Configurations for MapReduce JobHistory Server: - -*-------------------------+-------------------------+------------------------+ -|| Parameter || Value || Notes | -*-------------------------+-------------------------+------------------------+ -| <<>> | | | -| | MapReduce JobHistory Server | Default port is 10020. | -*-------------------------+-------------------------+------------------------+ -| <<>> | | -| | | | -| | | Kerberos keytab file for the MapReduce JobHistory Server. | -*-------------------------+-------------------------+------------------------+ -| <<>> | jhs/_HOST@REALM.TLD | | -| | | Kerberos principal name for the MapReduce JobHistory Server. | -*-------------------------+-------------------------+------------------------+ - * {Operating the Hadoop Cluster} diff --git a/hadoop-common-project/hadoop-common/src/site/apt/CommandsManual.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/CommandsManual.apt.vm index 857b7317fa..18114bbc3b 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/CommandsManual.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/CommandsManual.apt.vm @@ -44,8 +44,9 @@ Overview Generic Options The following options are supported by {{dfsadmin}}, {{fs}}, {{fsck}}, - {{job}} and {{fetchdt}}. Applications should implement {{{some_useful_url}Tool}} to support - {{{another_useful_url}GenericOptions}}. + {{job}} and {{fetchdt}}. Applications should implement + {{{../../api/org/apache/hadoop/util/Tool.html}Tool}} to support + GenericOptions. *------------------------------------------------+-----------------------------+ || GENERIC_OPTION || Description @@ -123,7 +124,8 @@ User Commands * <<>> - Runs a HDFS filesystem checking utility. See {{Fsck}} for more info. + Runs a HDFS filesystem checking utility. + See {{{../hadoop-hdfs/HdfsUserGuide.html#fsck}fsck}} for more info. Usage: << [-move | -delete | -openforwrite] [-files [-blocks [-locations | -racks]]]>>> @@ -149,7 +151,8 @@ User Commands * <<>> - Gets Delegation Token from a NameNode. See {{fetchdt}} for more info. + Gets Delegation Token from a NameNode. + See {{{../hadoop-hdfs/HdfsUserGuide.html#fetchdt}fetchdt}} for more info. Usage: <<] >>> @@ -302,7 +305,8 @@ Administration Commands * <<>> Runs a cluster balancing utility. An administrator can simply press Ctrl-C - to stop the rebalancing process. See Rebalancer for more details. + to stop the rebalancing process. See + {{{../hadoop-hdfs/HdfsUserGuide.html#Rebalancer}Rebalancer}} for more details. Usage: <<]>>> @@ -445,7 +449,7 @@ Administration Commands * <<>> Runs the namenode. More info about the upgrade, rollback and finalize is - at Upgrade Rollback + at {{{../hadoop-hdfs/HdfsUserGuide.html#Upgrade_and_Rollback}Upgrade Rollback}}. Usage: <<>> @@ -474,8 +478,9 @@ Administration Commands * <<>> - Runs the HDFS secondary namenode. See Secondary Namenode for more - info. + Runs the HDFS secondary namenode. + See {{{../hadoop-hdfs/HdfsUserGuide.html#Secondary_NameNode}Secondary Namenode}} + for more info. Usage: <<>> diff --git a/hadoop-common-project/hadoop-common/src/site/apt/Compatibility.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/Compatibility.apt.vm index 2cd97650a2..e3c4a4daf1 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/Compatibility.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/Compatibility.apt.vm @@ -233,9 +233,10 @@ hand-in-hand to address this. * In particular for MapReduce applications, the developer community will try our best to support provide binary compatibility across major - releases e.g. applications using org.apache.hadop.mapred.* APIs are - supported compatibly across hadoop-1.x and hadoop-2.x. See - {{{../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html} + releases e.g. applications using org.apache.hadoop.mapred. + + * APIs are supported compatibly across hadoop-1.x and hadoop-2.x. See + {{{../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html} Compatibility for MapReduce applications between hadoop-1.x and hadoop-2.x}} for more details. @@ -248,13 +249,13 @@ hand-in-hand to address this. * {{{../hadoop-hdfs/WebHDFS.html}WebHDFS}} - Stable - * {{{../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html}ResourceManager}} + * {{{../../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html}ResourceManager}} - * {{{../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html}NodeManager}} + * {{{../../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html}NodeManager}} - * {{{../hadoop-yarn/hadoop-yarn-site/MapredAppMasterRest.html}MR Application Master}} + * {{{../../hadoop-yarn/hadoop-yarn-site/MapredAppMasterRest.html}MR Application Master}} - * {{{../hadoop-yarn/hadoop-yarn-site/HistoryServerRest.html}History Server}} + * {{{../../hadoop-yarn/hadoop-yarn-site/HistoryServerRest.html}History Server}} *** Policy @@ -512,7 +513,8 @@ hand-in-hand to address this. {{{https://issues.apache.org/jira/browse/HADOOP-9517}HADOOP-9517}} * Binary compatibility for MapReduce end-user applications between hadoop-1.x and hadoop-2.x - - {{{../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html}MapReduce Compatibility between hadoop-1.x and hadoop-2.x}} + {{{../../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html} + MapReduce Compatibility between hadoop-1.x and hadoop-2.x}} * Annotations for interfaces as per interface classification schedule - diff --git a/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm index 3e6fd21070..53ef0cabef 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm @@ -88,7 +88,7 @@ chgrp Change group association of files. The user must be the owner of files, or else a super-user. Additional information is in the - {{{betterurl}Permissions Guide}}. + {{{../hadoop-hdfs/HdfsPermissionsGuide.html}Permissions Guide}}. Options @@ -101,7 +101,7 @@ chmod Change the permissions of files. With -R, make the change recursively through the directory structure. The user must be the owner of the file, or else a super-user. Additional information is in the - {{{betterurl}Permissions Guide}}. + {{{../hadoop-hdfs/HdfsPermissionsGuide.html}Permissions Guide}}. Options @@ -112,7 +112,7 @@ chown Usage: <<>> Change the owner of files. The user must be a super-user. Additional information - is in the {{{betterurl}Permissions Guide}}. + is in the {{{../hadoop-hdfs/HdfsPermissionsGuide.html}Permissions Guide}}. Options @@ -210,8 +210,8 @@ expunge Usage: <<>> - Empty the Trash. Refer to the {{{betterurl}HDFS Architecture Guide}} for - more information on the Trash feature. + Empty the Trash. Refer to the {{{../hadoop-hdfs/HdfsDesign.html} + HDFS Architecture Guide}} for more information on the Trash feature. get @@ -439,7 +439,9 @@ test Options: * The -e option will check to see if the file exists, returning 0 if true. + * The -z option will check to see if the file is zero length, returning 0 if true. + * The -d option will check to see if the path is directory, returning 0 if true. Example: diff --git a/hadoop-common-project/hadoop-common/src/site/apt/InterfaceClassification.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/InterfaceClassification.apt.vm index 811cfe5410..85e66bd9a6 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/InterfaceClassification.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/InterfaceClassification.apt.vm @@ -18,8 +18,6 @@ Hadoop Interface Taxonomy: Audience and Stability Classification - \[ {{{./index.html}Go Back}} \] - %{toc|section=1|fromDepth=0} * Motivation diff --git a/hadoop-common-project/hadoop-common/src/site/apt/NativeLibraries.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/NativeLibraries.apt.vm index df4a1639ce..5b315ee358 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/NativeLibraries.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/NativeLibraries.apt.vm @@ -117,23 +117,19 @@ Native Libraries Guide * zlib-development package (stable version >= 1.2.0) Once you installed the prerequisite packages use the standard hadoop - build.xml file and pass along the compile.native flag (set to true) to - build the native hadoop library: + pom.xml file and pass along the native flag to build the native hadoop + library: ---- - $ ant -Dcompile.native=true + $ mvn package -Pdist,native -Dskiptests -Dtar ---- You should see the newly-built library in: ---- - $ build/native//lib + $ hadoop-dist/target/hadoop-${project.version}/lib/native ---- - where is a combination of the system-properties: - ${os.name}-${os.arch}-${sun.arch.data.model} (for example, - Linux-i386-32). - Please note the following: * It is mandatory to install both the zlib and gzip development diff --git a/hadoop-common-project/hadoop-common/src/site/apt/SecureMode.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/SecureMode.apt.vm new file mode 100644 index 0000000000..9bd55a67ff --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/apt/SecureMode.apt.vm @@ -0,0 +1,637 @@ +~~ Licensed under the Apache License, Version 2.0 (the "License"); +~~ you may not use this file except in compliance with the License. +~~ You may obtain a copy of the License at +~~ +~~ http://www.apache.org/licenses/LICENSE-2.0 +~~ +~~ Unless required by applicable law or agreed to in writing, software +~~ distributed under the License is distributed on an "AS IS" BASIS, +~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +~~ See the License for the specific language governing permissions and +~~ limitations under the License. See accompanying LICENSE file. + + --- + Hadoop in Secure Mode + --- + --- + ${maven.build.timestamp} + +%{toc|section=0|fromDepth=0|toDepth=3} + +Hadoop in Secure Mode + +* Introduction + + This document describes how to configure authentication for Hadoop in + secure mode. + + By default Hadoop runs in non-secure mode in which no actual + authentication is required. + By configuring Hadoop runs in secure mode, + each user and service needs to be authenticated by Kerberos + in order to use Hadoop services. + + Security features of Hadoop consist of + {{{Authentication}authentication}}, + {{{./ServiceLevelAuth.html}service level authorization}}, + {{{./HttpAuthentication.html}authentication for Web consoles}} + and {{{Data confidentiality}data confidenciality}}. + + +* Authentication + +** End User Accounts + + When service level authentication is turned on, + end users using Hadoop in secure mode needs to be authenticated by Kerberos. + The simplest way to do authentication is using <<>> command of Kerberos. + +** User Accounts for Hadoop Daemons + + Ensure that HDFS and YARN daemons run as different Unix users, + e.g. <<>> and <<>>. + Also, ensure that the MapReduce JobHistory server runs as + different user such as <<>>. + + It's recommended to have them share a Unix group, for e.g. <<>>. + See also "{{Mapping from user to group}}" for group management. + +*---------------+----------------------------------------------------------------------+ +|| User:Group || Daemons | +*---------------+----------------------------------------------------------------------+ +| hdfs:hadoop | NameNode, Secondary NameNode, JournalNode, DataNode | +*---------------+----------------------------------------------------------------------+ +| yarn:hadoop | ResourceManager, NodeManager | +*---------------+----------------------------------------------------------------------+ +| mapred:hadoop | MapReduce JobHistory Server | +*---------------+----------------------------------------------------------------------+ + +** Kerberos principals for Hadoop Daemons and Users + + For running hadoop service daemons in Hadoop in secure mode, + Kerberos principals are required. + Each service reads auhenticate information saved in keytab file with appropriate permission. + + HTTP web-consoles should be served by principal different from RPC's one. + + Subsections below shows the examples of credentials for Hadoop services. + +*** HDFS + + The NameNode keytab file, on the NameNode host, should look like the + following: + +---- +$ klist -e -k -t /etc/security/keytab/nn.service.keytab +Keytab name: FILE:/etc/security/keytab/nn.service.keytab +KVNO Timestamp Principal + 4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 nn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) +---- + + The Secondary NameNode keytab file, on that host, should look like the + following: + +---- +$ klist -e -k -t /etc/security/keytab/sn.service.keytab +Keytab name: FILE:/etc/security/keytab/sn.service.keytab +KVNO Timestamp Principal + 4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 sn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) +---- + + The DataNode keytab file, on each host, should look like the following: + +---- +$ klist -e -k -t /etc/security/keytab/dn.service.keytab +Keytab name: FILE:/etc/security/keytab/dn.service.keytab +KVNO Timestamp Principal + 4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 dn/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) +---- + +*** YARN + + The ResourceManager keytab file, on the ResourceManager host, should look + like the following: + +---- +$ klist -e -k -t /etc/security/keytab/rm.service.keytab +Keytab name: FILE:/etc/security/keytab/rm.service.keytab +KVNO Timestamp Principal + 4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 rm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) +---- + + The NodeManager keytab file, on each host, should look like the following: + +---- +$ klist -e -k -t /etc/security/keytab/nm.service.keytab +Keytab name: FILE:/etc/security/keytab/nm.service.keytab +KVNO Timestamp Principal + 4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 nm/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) +---- + +*** MapReduce JobHistory Server + + The MapReduce JobHistory Server keytab file, on that host, should look + like the following: + +---- +$ klist -e -k -t /etc/security/keytab/jhs.service.keytab +Keytab name: FILE:/etc/security/keytab/jhs.service.keytab +KVNO Timestamp Principal + 4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 jhs/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-256 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (AES-128 CTS mode with 96-bit SHA-1 HMAC) + 4 07/18/11 21:08:09 host/full.qualified.domain.name@REALM.TLD (ArcFour with HMAC/md5) +---- + +** Mapping from Kerberos principal to OS user account + + Hadoop maps Kerberos principal to OS user account using + the rule specified by <<>> + which works in the same way as the <<>> in + {{{http://web.mit.edu/Kerberos/krb5-latest/doc/admin/conf_files/krb5_conf.html}Kerberos configuration file (krb5.conf)}}. + + By default, it picks the first component of principal name as a user name + if the realms matches to the <<>> (usually defined in /etc/krb5.conf). + For example, <<>> is mapped to <<>> + by default rule. + +** Mapping from user to group + + Though files on HDFS are associated to owner and group, + Hadoop does not have the definition of group by itself. + Mapping from user to group is done by OS or LDAP. + + You can change a way of mapping by + specifying the name of mapping provider as a value of + <<>> + See {{{../hadoop-hdfs/HdfsPermissionsGuide.html}HDFS Permissions Guide}} for details. + + Practically you need to manage SSO environment using Kerberos with LDAP + for Hadoop in secure mode. + +** Proxy user + + Some products such as Apache Oozie which access the services of Hadoop + on behalf of end users need to be able to impersonate end users. + You can configure proxy user using properties + <<>> and <<>>. + + For example, by specifying as below in core-site.xml, + user named <<>> accessing from any host + can impersonate any user belonging to any group. + +---- + + hadoop.proxyuser.oozie.hosts + * + + + hadoop.proxyuser.oozie.groups + * + +---- + +** Secure DataNode + + Because the data transfer protocol of DataNode + does not use the RPC framework of Hadoop, + DataNode must authenticate itself by + using privileged ports which are specified by + <<>> and <<>>. + This authentication is based on the assumption + that the attacker won't be able to get root privileges. + + When you execute <<>> command as root, + server process binds privileged port at first, + then drops privilege and runs as the user account specified by + <<>>. + This startup process uses jsvc installed to <<>>. + You must specify <<>> and <<>> + as environment variables on start up (in hadoop-env.sh). + + +* Data confidentiality + +** Data Encryption on RPC + + The data transfered between hadoop services and clients. + Setting <<>> to <<<"privacy">>> in the core-site.xml + activate data encryption. + +** Data Encryption on Block data transfer. + + You need to set <<>> to <<<"true">>> in the hdfs-site.xml + in order to activate data encryption for data transfer protocol of DataNode. + +** Data Encryption on HTTP + + Data transfer between Web-console and clients are protected by using SSL(HTTPS). + + +* Configuration + +** Permissions for both HDFS and local fileSystem paths + + The following table lists various paths on HDFS and local filesystems (on + all nodes) and recommended permissions: + +*-------------------+-------------------+------------------+------------------+ +|| Filesystem || Path || User:Group || Permissions | +*-------------------+-------------------+------------------+------------------+ +| local | <<>> | hdfs:hadoop | drwx------ | +*-------------------+-------------------+------------------+------------------+ +| local | <<>> | hdfs:hadoop | drwx------ | +*-------------------+-------------------+------------------+------------------+ +| local | $HADOOP_LOG_DIR | hdfs:hadoop | drwxrwxr-x | +*-------------------+-------------------+------------------+------------------+ +| local | $YARN_LOG_DIR | yarn:hadoop | drwxrwxr-x | +*-------------------+-------------------+------------------+------------------+ +| local | <<>> | yarn:hadoop | drwxr-xr-x | +*-------------------+-------------------+------------------+------------------+ +| local | <<>> | yarn:hadoop | drwxr-xr-x | +*-------------------+-------------------+------------------+------------------+ +| local | container-executor | root:hadoop | --Sr-s--- | +*-------------------+-------------------+------------------+------------------+ +| local | <<>> | root:hadoop | r-------- | +*-------------------+-------------------+------------------+------------------+ +| hdfs | / | hdfs:hadoop | drwxr-xr-x | +*-------------------+-------------------+------------------+------------------+ +| hdfs | /tmp | hdfs:hadoop | drwxrwxrwxt | +*-------------------+-------------------+------------------+------------------+ +| hdfs | /user | hdfs:hadoop | drwxr-xr-x | +*-------------------+-------------------+------------------+------------------+ +| hdfs | <<>> | yarn:hadoop | drwxrwxrwxt | +*-------------------+-------------------+------------------+------------------+ +| hdfs | <<>> | mapred:hadoop | | +| | | | drwxrwxrwxt | +*-------------------+-------------------+------------------+------------------+ +| hdfs | <<>> | mapred:hadoop | | +| | | | drwxr-x--- | +*-------------------+-------------------+------------------+------------------+ + +** Common Configurations + + In order to turn on RPC authentication in hadoop, + set the value of <<>> property to + <<<"kerberos">>>, and set security related settings listed below appropriately. + + The following properties should be in the <<>> of all the + nodes in the cluster. + +*-------------------------+-------------------------+------------------------+ +|| Parameter || Value || Notes | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | | <<>> : No authentication. (default) \ +| | | <<>> : Enable authentication by Kerberos. | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | | Enable {{{./ServiceLevelAuth.html}RPC service-level authorization}}. | +*-------------------------+-------------------------+------------------------+ +| <<>> | | +| | | : authentication only (default) \ +| | | : integrity check in addition to authentication \ +| | | : data encryption in addition to integrity | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | <<>>\ +| | <<>>\ +| | <...>\ +| | DEFAULT | +| | | The value is string containing new line characters. +| | | See +| | | {{{http://web.mit.edu/Kerberos/krb5-latest/doc/admin/conf_files/krb5_conf.html}Kerberos documentation}} +| | | for format for . +*-------------------------+-------------------------+------------------------+ +| <<>><<<.hosts>>> | | | +| | | comma separated hosts from which access are allowd to impersonation. | +| | | <<<*>>> means wildcard. | +*-------------------------+-------------------------+------------------------+ +| <<>><<<.groups>>> | | | +| | | comma separated groups to which users impersonated by belongs. | +| | | <<<*>>> means wildcard. | +*-------------------------+-------------------------+------------------------+ +Configuration for <<>> + +** NameNode + +*-------------------------+-------------------------+------------------------+ +|| Parameter || Value || Notes | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | | Enable HDFS block access tokens for secure operations. | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | | This value is deprecated. Use dfs.http.policy | +*-------------------------+-------------------------+------------------------+ +| <<>> | or or | | +| | | HTTPS_ONLY turns off http access | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +*-------------------------+-------------------------+------------------------+ +| <<>> | <50470> | | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | | Kerberos keytab file for the NameNode. | +*-------------------------+-------------------------+------------------------+ +| <<>> | nn/_HOST@REALM.TLD | | +| | | Kerberos principal name for the NameNode. | +*-------------------------+-------------------------+------------------------+ +| <<>> | host/_HOST@REALM.TLD | | +| | | HTTPS Kerberos principal name for the NameNode. | +*-------------------------+-------------------------+------------------------+ +Configuration for <<>> + +** Secondary NameNode + +*-------------------------+-------------------------+------------------------+ +|| Parameter || Value || Notes | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +*-------------------------+-------------------------+------------------------+ +| <<>> | <50470> | | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | | | +| | | Kerberos keytab file for the NameNode. | +*-------------------------+-------------------------+------------------------+ +| <<>> | sn/_HOST@REALM.TLD | | +| | | Kerberos principal name for the Secondary NameNode. | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | host/_HOST@REALM.TLD | | +| | | HTTPS Kerberos principal name for the Secondary NameNode. | +*-------------------------+-------------------------+------------------------+ +Configuration for <<>> + +** DataNode + +*-------------------------+-------------------------+------------------------+ +|| Parameter || Value || Notes | +*-------------------------+-------------------------+------------------------+ +| <<>> | 700 | | +*-------------------------+-------------------------+------------------------+ +| <<>> | <0.0.0.0:1004> | | +| | | Secure DataNode must use privileged port | +| | | in order to assure that the server was started securely. | +| | | This means that the server must be started via jsvc. | +*-------------------------+-------------------------+------------------------+ +| <<>> | <0.0.0.0:1006> | | +| | | Secure DataNode must use privileged port | +| | | in order to assure that the server was started securely. | +| | | This means that the server must be started via jsvc. | +*-------------------------+-------------------------+------------------------+ +| <<>> | <0.0.0.0:50470> | | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | | Kerberos keytab file for the DataNode. | +*-------------------------+-------------------------+------------------------+ +| <<>> | dn/_HOST@REALM.TLD | | +| | | Kerberos principal name for the DataNode. | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | host/_HOST@REALM.TLD | | +| | | HTTPS Kerberos principal name for the DataNode. | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | | set to <<>> when using data encryption | +*-------------------------+-------------------------+------------------------+ +Configuration for <<>> + + +** WebHDFS + +*-------------------------+-------------------------+------------------------+ +|| Parameter || Value || Notes | +*-------------------------+-------------------------+------------------------+ +| <<>> | http/_HOST@REALM.TLD | | +| | | Enable security on WebHDFS. | +*-------------------------+-------------------------+------------------------+ +| <<>> | http/_HOST@REALM.TLD | | +| | | Kerberos keytab file for the WebHDFS. | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | | Kerberos principal name for WebHDFS. | +*-------------------------+-------------------------+------------------------+ +Configuration for <<>> + + +** ResourceManager + +*-------------------------+-------------------------+------------------------+ +|| Parameter || Value || Notes | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | | | +| | | Kerberos keytab file for the ResourceManager. | +*-------------------------+-------------------------+------------------------+ +| <<>> | rm/_HOST@REALM.TLD | | +| | | Kerberos principal name for the ResourceManager. | +*-------------------------+-------------------------+------------------------+ +Configuration for <<>> + +** NodeManager + +*-------------------------+-------------------------+------------------------+ +|| Parameter || Value || Notes | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | | Kerberos keytab file for the NodeManager. | +*-------------------------+-------------------------+------------------------+ +| <<>> | nm/_HOST@REALM.TLD | | +| | | Kerberos principal name for the NodeManager. | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | <<>> | +| | | Use LinuxContainerExecutor. | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | | Unix group of the NodeManager. | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | | The path to the executable of Linux container executor. | +*-------------------------+-------------------------+------------------------+ +Configuration for <<>> + +** Configuration for WebAppProxy + + The <<>> provides a proxy between the web applications + exported by an application and an end user. If security is enabled + it will warn users before accessing a potentially unsafe web application. + Authentication and authorization using the proxy is handled just like + any other privileged web application. + +*-------------------------+-------------------------+------------------------+ +|| Parameter || Value || Notes | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | <<>> host:port for proxy to AM web apps. | | +| | | if this is the same as <<>>| +| | | or it is not defined then the <<>> will run the proxy| +| | | otherwise a standalone proxy server will need to be launched.| +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | | | +| | | Kerberos keytab file for the WebAppProxy. | +*-------------------------+-------------------------+------------------------+ +| <<>> | wap/_HOST@REALM.TLD | | +| | | Kerberos principal name for the WebAppProxy. | +*-------------------------+-------------------------+------------------------+ +Configuration for <<>> + +** LinuxContainerExecutor + + A <<>> used by YARN framework which define how any + launched and controlled. + + The following are the available in Hadoop YARN: + +*--------------------------------------+--------------------------------------+ +|| ContainerExecutor || Description | +*--------------------------------------+--------------------------------------+ +| <<>> | | +| | The default executor which YARN uses to manage container execution. | +| | The container process has the same Unix user as the NodeManager. | +*--------------------------------------+--------------------------------------+ +| <<>> | | +| | Supported only on GNU/Linux, this executor runs the containers as either the | +| | YARN user who submitted the application (when full security is enabled) or | +| | as a dedicated user (defaults to nobody) when full security is not enabled. | +| | When full security is enabled, this executor requires all user accounts to be | +| | created on the cluster nodes where the containers are launched. It uses | +| | a executable that is included in the Hadoop distribution. | +| | The NodeManager uses this executable to launch and kill containers. | +| | The setuid executable switches to the user who has submitted the | +| | application and launches or kills the containers. For maximum security, | +| | this executor sets up restricted permissions and user/group ownership of | +| | local files and directories used by the containers such as the shared | +| | objects, jars, intermediate files, log files etc. Particularly note that, | +| | because of this, except the application owner and NodeManager, no other | +| | user can access any of the local files/directories including those | +| | localized as part of the distributed cache. | +*--------------------------------------+--------------------------------------+ + + To build the LinuxContainerExecutor executable run: + +---- + $ mvn package -Dcontainer-executor.conf.dir=/etc/hadoop/ +---- + + The path passed in <<<-Dcontainer-executor.conf.dir>>> should be the + path on the cluster nodes where a configuration file for the setuid + executable should be located. The executable should be installed in + $HADOOP_YARN_HOME/bin. + + The executable must have specific permissions: 6050 or --Sr-s--- + permissions user-owned by (super-user) and group-owned by a + special group (e.g. <<>>) of which the NodeManager Unix user is + the group member and no ordinary application user is. If any application + user belongs to this special group, security will be compromised. This + special group name should be specified for the configuration property + <<>> in both + <<>> and <<>>. + + For example, let's say that the NodeManager is run as user who is + part of the groups users and , any of them being the primary group. + Let also be that has both and another user + (application submitter) as its members, and does not + belong to . Going by the above description, the setuid/setgid + executable should be set 6050 or --Sr-s--- with user-owner as and + group-owner as which has as its member (and not + which has also as its member besides ). + + The LinuxTaskController requires that paths including and leading up to + the directories specified in <<>> and + <<>> to be set 755 permissions as described + above in the table on permissions on directories. + + * <<>> + + The executable requires a configuration file called + <<>> to be present in the configuration + directory passed to the mvn target mentioned above. + + The configuration file must be owned by the user running NodeManager + (user <<>> in the above example), group-owned by anyone and + should have the permissions 0400 or r--------. + + The executable requires following configuration items to be present + in the <<>> file. The items should be + mentioned as simple key=value pairs, one per-line: + +*-------------------------+-------------------------+------------------------+ +|| Parameter || Value || Notes | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | | Unix group of the NodeManager. The group owner of the | +| | | binary should be this group. Should be same as the | +| | | value with which the NodeManager is configured. This configuration is | +| | | required for validating the secure access of the | +| | | binary. | +*-------------------------+-------------------------+------------------------+ +| <<>> | hfds,yarn,mapred,bin | Banned users. | +*-------------------------+-------------------------+------------------------+ +| <<>> | foo,bar | Allowed system users. | +*-------------------------+-------------------------+------------------------+ +| <<>> | 1000 | Prevent other super-users. | +*-------------------------+-------------------------+------------------------+ +Configuration for <<>> + + To re-cap, here are the local file-sysytem permissions required for the + various paths related to the <<>>: + +*-------------------+-------------------+------------------+------------------+ +|| Filesystem || Path || User:Group || Permissions | +*-------------------+-------------------+------------------+------------------+ +| local | container-executor | root:hadoop | --Sr-s--- | +*-------------------+-------------------+------------------+------------------+ +| local | <<>> | root:hadoop | r-------- | +*-------------------+-------------------+------------------+------------------+ +| local | <<>> | yarn:hadoop | drwxr-xr-x | +*-------------------+-------------------+------------------+------------------+ +| local | <<>> | yarn:hadoop | drwxr-xr-x | +*-------------------+-------------------+------------------+------------------+ + +** MapReduce JobHistory Server + +*-------------------------+-------------------------+------------------------+ +|| Parameter || Value || Notes | +*-------------------------+-------------------------+------------------------+ +| <<>> | | | +| | MapReduce JobHistory Server | Default port is 10020. | +*-------------------------+-------------------------+------------------------+ +| <<>> | | +| | | | +| | | Kerberos keytab file for the MapReduce JobHistory Server. | +*-------------------------+-------------------------+------------------------+ +| <<>> | jhs/_HOST@REALM.TLD | | +| | | Kerberos principal name for the MapReduce JobHistory Server. | +*-------------------------+-------------------------+------------------------+ +Configuration for <<>> diff --git a/hadoop-common-project/hadoop-common/src/site/apt/ServiceLevelAuth.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/ServiceLevelAuth.apt.vm index 467598d4fd..258819e110 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/ServiceLevelAuth.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/ServiceLevelAuth.apt.vm @@ -29,8 +29,10 @@ Service Level Authorization Guide Make sure Hadoop is installed, configured and setup correctly. For more information see: - * Single Node Setup for first-time users. - * Cluster Setup for large, distributed clusters. + + * {{{./SingleCluster.html}Single Node Setup}} for first-time users. + + * {{{./ClusterSetup.html}Cluster Setup}} for large, distributed clusters. * Overview diff --git a/hadoop-common-project/hadoop-common/src/site/apt/SingleCluster.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/SingleCluster.apt.vm index c86cd6defd..cf8390e461 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/SingleCluster.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/SingleCluster.apt.vm @@ -18,8 +18,6 @@ Hadoop MapReduce Next Generation - Setting up a Single Node Cluster. - \[ {{{./index.html}Go Back}} \] - %{toc|section=1|fromDepth=0} * Mapreduce Tarball @@ -32,7 +30,8 @@ $ mvn clean install -DskipTests $ cd hadoop-mapreduce-project $ mvn clean install assembly:assembly -Pnative +---+ - <> You will need protoc 2.5.0 installed. + <> You will need {{{http://code.google.com/p/protobuf}protoc 2.5.0}} + installed. To ignore the native builds in mapreduce you can omit the <<<-Pnative>>> argument for maven. The tarball should be available in <<>> directory. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestVersionUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestVersionUtil.java index f01ae2f73d..b1737dcb52 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestVersionUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestVersionUtil.java @@ -28,10 +28,30 @@ public class TestVersionUtil { // Equal versions are equal. assertEquals(0, VersionUtil.compareVersions("2.0.0", "2.0.0")); assertEquals(0, VersionUtil.compareVersions("2.0.0a", "2.0.0a")); - assertEquals(0, VersionUtil.compareVersions("1", "1")); assertEquals(0, VersionUtil.compareVersions( "2.0.0-SNAPSHOT", "2.0.0-SNAPSHOT")); - + + assertEquals(0, VersionUtil.compareVersions("1", "1")); + assertEquals(0, VersionUtil.compareVersions("1", "1.0")); + assertEquals(0, VersionUtil.compareVersions("1", "1.0.0")); + + assertEquals(0, VersionUtil.compareVersions("1.0", "1")); + assertEquals(0, VersionUtil.compareVersions("1.0", "1.0")); + assertEquals(0, VersionUtil.compareVersions("1.0", "1.0.0")); + + assertEquals(0, VersionUtil.compareVersions("1.0.0", "1")); + assertEquals(0, VersionUtil.compareVersions("1.0.0", "1.0")); + assertEquals(0, VersionUtil.compareVersions("1.0.0", "1.0.0")); + + assertEquals(0, VersionUtil.compareVersions("1.0.0-alpha-1", "1.0.0-a1")); + assertEquals(0, VersionUtil.compareVersions("1.0.0-alpha-2", "1.0.0-a2")); + assertEquals(0, VersionUtil.compareVersions("1.0.0-alpha1", "1.0.0-alpha-1")); + + assertEquals(0, VersionUtil.compareVersions("1a0", "1.0.0-alpha-0")); + assertEquals(0, VersionUtil.compareVersions("1a0", "1-a0")); + assertEquals(0, VersionUtil.compareVersions("1.a0", "1-a0")); + assertEquals(0, VersionUtil.compareVersions("1.a0", "1.0.0-alpha-0")); + // Assert that lower versions are lower, and higher versions are higher. assertExpectedValues("1", "2.0.0"); assertExpectedValues("1.0.0", "2"); @@ -51,15 +71,27 @@ public class TestVersionUtil { assertExpectedValues("1.0.2a", "1.0.2ab"); assertExpectedValues("1.0.0a1", "1.0.0a2"); assertExpectedValues("1.0.0a2", "1.0.0a10"); + // The 'a' in "1.a" is not followed by digit, thus not treated as "alpha", + // and treated larger than "1.0", per maven's ComparableVersion class + // implementation. assertExpectedValues("1.0", "1.a"); - assertExpectedValues("1.0", "1.a0"); + //The 'a' in "1.a0" is followed by digit, thus treated as "alpha-" + assertExpectedValues("1.a0", "1.0"); + assertExpectedValues("1a0", "1.0"); + assertExpectedValues("1.0.1-alpha-1", "1.0.1-alpha-2"); + assertExpectedValues("1.0.1-beta-1", "1.0.1-beta-2"); // Snapshot builds precede their eventual releases. assertExpectedValues("1.0-SNAPSHOT", "1.0"); - assertExpectedValues("1.0", "1.0.0-SNAPSHOT"); + assertExpectedValues("1.0.0-SNAPSHOT", "1.0"); assertExpectedValues("1.0.0-SNAPSHOT", "1.0.0"); assertExpectedValues("1.0.0", "1.0.1-SNAPSHOT"); assertExpectedValues("1.0.1-SNAPSHOT", "1.0.1"); + assertExpectedValues("1.0.1-SNAPSHOT", "1.0.2"); + + assertExpectedValues("1.0.1-alpha-1", "1.0.1-SNAPSHOT"); + assertExpectedValues("1.0.1-beta-1", "1.0.1-SNAPSHOT"); + assertExpectedValues("1.0.1-beta-2", "1.0.1-SNAPSHOT"); } private static void assertExpectedValues(String lower, String higher) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 09eaf6bd6e..03a25891c4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -504,6 +504,9 @@ Release 2.4.0 - UNRELEASED HDFS-5788. listLocatedStatus response can be very large. (Nathan Roberts via kihwal) + HDFS-5781. Use an array to record the mapping between FSEditLogOpCode and + the corresponding byte value. (jing9) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) @@ -979,6 +982,9 @@ Release 2.3.0 - UNRELEASED HDFS-5677. Need error checking for HA cluster configuration. (Vincent Sheffer via cos) + HDFS-5825. Use FileUtils.copyFile() to implement DFSTestUtils.copyFile(). + (Haohui Mai via Arpit Agarwal) + OPTIMIZATIONS BUG FIXES @@ -1142,6 +1148,15 @@ Release 2.3.0 - UNRELEASED HDFS-5343. When cat command is issued on snapshot files getting unexpected result. (Sathish via umamahesh) + HDFS-5297. Fix dead links in HDFS site documents. (Akira Ajisaka via + Arpit Agarwal) + + HDFS-5830. WebHdfsFileSystem.getFileBlockLocations throws + IllegalArgumentException when accessing another cluster. (Yongjun Zhang via + Colin Patrick McCabe) + + HDFS-5833. Fix SecondaryNameNode javadoc. (Bangtao Zhou via Arpit Agarwal) + Release 2.2.0 - 2013-10-13 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java index 0e6dd12546..bb98db46f5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java @@ -98,9 +98,8 @@ public class LocatedBlock { } this.storageIDs = storageIDs; this.storageTypes = storageTypes; - Preconditions.checkArgument(cachedLocs != null, - "cachedLocs should not be null, use a different constructor"); - if (cachedLocs.length == 0) { + + if (cachedLocs == null || cachedLocs.length == 0) { this.cachedLocs = EMPTY_LOCS; } else { this.cachedLocs = cachedLocs; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java index c6df970af7..1c60156c70 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java @@ -69,7 +69,7 @@ public enum FSEditLogOpCodes { OP_MODIFY_CACHE_DIRECTIVE ((byte) 39), OP_UPGRADE_MARKER ((byte) 40), - // Note that fromByte(..) depends on OP_INVALID being at the last position. + // Note that the current range of the valid OP code is 0~127 OP_INVALID ((byte) -1); private final byte opCode; @@ -92,7 +92,22 @@ public enum FSEditLogOpCodes { return opCode; } - private static final FSEditLogOpCodes[] VALUES = FSEditLogOpCodes.values(); + private static FSEditLogOpCodes[] VALUES; + + static { + byte max = 0; + for (FSEditLogOpCodes code : FSEditLogOpCodes.values()) { + if (code.getOpCode() > max) { + max = code.getOpCode(); + } + } + VALUES = new FSEditLogOpCodes[max + 1]; + for (FSEditLogOpCodes code : FSEditLogOpCodes.values()) { + if (code.getOpCode() >= 0) { + VALUES[code.getOpCode()] = code; + } + } + } /** * Converts byte to FSEditLogOpCodes enum value @@ -101,12 +116,9 @@ public enum FSEditLogOpCodes { * @return enum with byte value of opCode */ public static FSEditLogOpCodes fromByte(byte opCode) { - if (opCode == -1) { - return OP_INVALID; - } - if (opCode >= 0 && opCode < OP_INVALID.ordinal()) { + if (opCode >= 0 && opCode < VALUES.length) { return VALUES[opCode]; } - return null; + return opCode == -1 ? OP_INVALID : null; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java index c3d9b93efe..a593afc570 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java @@ -90,7 +90,7 @@ import com.google.common.collect.ImmutableList; * The Secondary NameNode is a daemon that periodically wakes * up (determined by the schedule specified in the configuration), * triggers a periodic checkpoint and then goes back to sleep. - * The Secondary NameNode uses the ClientProtocol to talk to the + * The Secondary NameNode uses the NamenodeProtocol to talk to the * primary NameNode. * **********************************************************/ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/Federation.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/Federation.apt.vm index f2ce08c631..b949ff2ec7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/Federation.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/Federation.apt.vm @@ -19,8 +19,6 @@ HDFS Federation - \[ {{{./index.html}Go Back}} \] - %{toc|section=1|fromDepth=0} This guide provides an overview of the HDFS Federation feature and diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithNFS.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithNFS.apt.vm index efa3f931bb..f250da7f73 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithNFS.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithNFS.apt.vm @@ -18,8 +18,6 @@ HDFS High Availability - \[ {{{./index.html}Go Back}} \] - %{toc|section=1|fromDepth=0} * {Purpose} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithQJM.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithQJM.apt.vm index eccd705cd5..31dccb29e4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithQJM.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSHighAvailabilityWithQJM.apt.vm @@ -18,8 +18,6 @@ HDFS High Availability Using the Quorum Journal Manager - \[ {{{./index.html}Go Back}} \] - %{toc|section=1|fromDepth=0} * {Purpose} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsEditsViewer.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsEditsViewer.apt.vm index 22eaa639b3..8c2db1b243 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsEditsViewer.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsEditsViewer.apt.vm @@ -20,8 +20,6 @@ Offline Edits Viewer Guide - \[ {{{./index.html}Go Back}} \] - %{toc|section=1|fromDepth=0} * Overview diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsImageViewer.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsImageViewer.apt.vm index 3a1e60c46a..2665a4fbb8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsImageViewer.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsImageViewer.apt.vm @@ -18,8 +18,6 @@ Offline Image Viewer Guide - \[ {{{./index.html}Go Back}} \] - %{toc|section=1|fromDepth=0} * Overview @@ -64,9 +62,9 @@ Offline Image Viewer Guide but no data recorded. The default record delimiter is a tab, but this may be changed via the -delimiter command line argument. This processor is designed to create output that is easily analyzed by - other tools, such as [36]Apache Pig. See the [37]Analyzing Results - section for further information on using this processor to analyze - the contents of fsimage files. + other tools, such as {{{http://pig.apache.org}Apache Pig}}. See + the {{Analyzing Results}} section for further information on using + this processor to analyze the contents of fsimage files. [[4]] XML creates an XML document of the fsimage and includes all of the information within the fsimage, similar to the lsr processor. The diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsPermissionsGuide.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsPermissionsGuide.apt.vm index d2d815d65a..7ab08ced7a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsPermissionsGuide.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsPermissionsGuide.apt.vm @@ -18,8 +18,6 @@ HDFS Permissions Guide - \[ {{{./index.html}Go Back}} \] - %{toc|section=1|fromDepth=0} * Overview @@ -55,8 +53,10 @@ HDFS Permissions Guide * If the user name matches the owner of foo, then the owner permissions are tested; + * Else if the group of foo matches any of member of the groups list, then the group permissions are tested; + * Otherwise the other permissions of foo are tested. If a permissions check fails, the client operation fails. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsQuotaAdminGuide.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsQuotaAdminGuide.apt.vm index 317fe7d46c..0821946609 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsQuotaAdminGuide.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsQuotaAdminGuide.apt.vm @@ -18,8 +18,6 @@ HDFS Quotas Guide - \[ {{{./index.html}Go Back}} \] - %{toc|section=1|fromDepth=0} * Overview diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsUserGuide.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsUserGuide.apt.vm index b9d1c637a3..b84da5991b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsUserGuide.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsUserGuide.apt.vm @@ -108,9 +108,11 @@ HDFS Users Guide The following documents describe how to install and set up a Hadoop cluster: - * {{Single Node Setup}} for first-time users. + * {{{../hadoop-common/SingleCluster.html}Single Node Setup}} + for first-time users. - * {{Cluster Setup}} for large, distributed clusters. + * {{{../hadoop-common/ClusterSetup.html}Cluster Setup}} + for large, distributed clusters. The rest of this document assumes the user is able to set up and run a HDFS with at least one DataNode. For the purpose of this document, both @@ -136,7 +138,8 @@ HDFS Users Guide for a command. These commands support most of the normal files system operations like copying files, changing file permissions, etc. It also supports a few HDFS specific operations like changing replication of - files. For more information see {{{File System Shell Guide}}}. + files. For more information see {{{../hadoop-common/FileSystemShell.html} + File System Shell Guide}}. ** DFSAdmin Command @@ -169,7 +172,7 @@ HDFS Users Guide of racks and datanodes attached to the tracks as viewed by the NameNode. - For command usage, see {{{dfsadmin}}}. + For command usage, see {{{../hadoop-common/CommandsManual.html#dfsadmin}dfsadmin}}. * Secondary NameNode @@ -203,7 +206,8 @@ HDFS Users Guide So that the check pointed image is always ready to be read by the primary NameNode if necessary. - For command usage, see {{{secondarynamenode}}}. + For command usage, + see {{{../hadoop-common/CommandsManual.html#secondarynamenode}secondarynamenode}}. * Checkpoint Node @@ -245,7 +249,7 @@ HDFS Users Guide Multiple checkpoint nodes may be specified in the cluster configuration file. - For command usage, see {{{namenode}}}. + For command usage, see {{{../hadoop-common/CommandsManual.html#namenode}namenode}}. * Backup Node @@ -287,7 +291,7 @@ HDFS Users Guide For a complete discussion of the motivation behind the creation of the Backup node and Checkpoint node, see {{{https://issues.apache.org/jira/browse/HADOOP-4539}HADOOP-4539}}. - For command usage, see {{{namenode}}}. + For command usage, see {{{../hadoop-common/CommandsManual.html#namenode}namenode}}. * Import Checkpoint @@ -310,7 +314,7 @@ HDFS Users Guide verifies that the image in <<>> is consistent, but does not modify it in any way. - For command usage, see {{{namenode}}}. + For command usage, see {{{../hadoop-common/CommandsManual.html#namenode}namenode}}. * Rebalancer @@ -337,7 +341,7 @@ HDFS Users Guide A brief administrator's guide for rebalancer as a PDF is attached to {{{https://issues.apache.org/jira/browse/HADOOP-1652}HADOOP-1652}}. - For command usage, see {{{balancer}}}. + For command usage, see {{{../hadoop-common/CommandsManual.html#balancer}balancer}}. * Rack Awareness @@ -379,8 +383,9 @@ HDFS Users Guide most of the recoverable failures. By default fsck ignores open files but provides an option to select all files during reporting. The HDFS fsck command is not a Hadoop shell command. It can be run as - <<>>. For command usage, see {{{fsck}}}. fsck can be run on the - whole file system or on a subset of files. + <<>>. For command usage, see + {{{../hadoop-common/CommandsManual.html#fsck}fsck}}. fsck can be run on + the whole file system or on a subset of files. * fetchdt @@ -393,7 +398,8 @@ HDFS Users Guide command. It can be run as <<>>. After you got the token you can run an HDFS command without having Kerberos tickets, by pointing <<>> environmental variable to the - delegation token file. For command usage, see {{{fetchdt}}} command. + delegation token file. For command usage, see + {{{../hadoop-common/CommandsManual.html#fetchdt}fetchdt}} command. * Recovery Mode @@ -427,10 +433,11 @@ HDFS Users Guide let alone to restart HDFS from scratch. HDFS allows administrators to go back to earlier version of Hadoop and rollback the cluster to the state it was in before the upgrade. HDFS upgrade is described in more - detail in {{{Hadoop Upgrade}}} Wiki page. HDFS can have one such backup at a - time. Before upgrading, administrators need to remove existing backup - using bin/hadoop dfsadmin <<<-finalizeUpgrade>>> command. The following - briefly describes the typical upgrade procedure: + detail in {{{http://wiki.apache.org/hadoop/Hadoop_Upgrade}Hadoop Upgrade}} + Wiki page. HDFS can have one such backup at a time. Before upgrading, + administrators need to remove existing backupusing bin/hadoop dfsadmin + <<<-finalizeUpgrade>>> command. The following briefly describes the + typical upgrade procedure: * Before upgrading Hadoop software, finalize if there an existing backup. <<>> status can tell if the cluster @@ -450,7 +457,7 @@ HDFS Users Guide * stop the cluster and distribute earlier version of Hadoop. - * start the cluster with rollback option. (<<>>). + * start the cluster with rollback option. (<<>>). * File Permissions and Security @@ -465,14 +472,15 @@ HDFS Users Guide * Scalability Hadoop currently runs on clusters with thousands of nodes. The - {{{PoweredBy}}} Wiki page lists some of the organizations that deploy Hadoop - on large clusters. HDFS has one NameNode for each cluster. Currently - the total memory available on NameNode is the primary scalability - limitation. On very large clusters, increasing average size of files - stored in HDFS helps with increasing cluster size without increasing - memory requirements on NameNode. The default configuration may not - suite very large clustes. The {{{FAQ}}} Wiki page lists suggested - configuration improvements for large Hadoop clusters. + {{{http://wiki.apache.org/hadoop/PoweredBy}PoweredBy}} Wiki page lists + some of the organizations that deploy Hadoop on large clusters. + HDFS has one NameNode for each cluster. Currently the total memory + available on NameNode is the primary scalability limitation. + On very large clusters, increasing average size of files stored in + HDFS helps with increasing cluster size without increasing memory + requirements on NameNode. The default configuration may not suite + very large clusters. The {{{http://wiki.apache.org/hadoop/FAQ}FAQ}} + Wiki page lists suggested configuration improvements for large Hadoop clusters. * Related Documentation @@ -481,19 +489,22 @@ HDFS Users Guide documentation about Hadoop and HDFS. The following list is a starting point for further exploration: - * {{{Hadoop Site}}}: The home page for the Apache Hadoop site. + * {{{http://hadoop.apache.org}Hadoop Site}}: The home page for + the Apache Hadoop site. - * {{{Hadoop Wiki}}}: The home page (FrontPage) for the Hadoop Wiki. Unlike + * {{{http://wiki.apache.org/hadoop/FrontPage}Hadoop Wiki}}: + The home page (FrontPage) for the Hadoop Wiki. Unlike the released documentation, which is part of Hadoop source tree, Hadoop Wiki is regularly edited by Hadoop Community. - * {{{FAQ}}}: The FAQ Wiki page. + * {{{http://wiki.apache.org/hadoop/FAQ}FAQ}}: The FAQ Wiki page. - * {{{Hadoop JavaDoc API}}}. + * {{{../../api/index.html}Hadoop JavaDoc API}}. - * {{{Hadoop User Mailing List}}}: core-user[at]hadoop.apache.org. + * Hadoop User Mailing List: user[at]hadoop.apache.org. - * Explore {{{src/hdfs/hdfs-default.xml}}}. It includes brief description of - most of the configuration variables available. + * Explore {{{./hdfs-default.xml}hdfs-default.xml}}. It includes + brief description of most of the configuration variables available. - * {{{Hadoop Commands Guide}}}: Hadoop commands usage. + * {{{../hadoop-common/CommandsManual.html}Hadoop Commands Guide}}: + Hadoop commands usage. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/Hftp.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/Hftp.apt.vm index 39985357f9..bab36bfd10 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/Hftp.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/Hftp.apt.vm @@ -18,8 +18,6 @@ HFTP Guide - \[ {{{./index.html}Go Back}} \] - %{toc|section=1|fromDepth=0} * Introduction diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ShortCircuitLocalReads.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ShortCircuitLocalReads.apt.vm index daa4c2fc36..6670d8a1a6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ShortCircuitLocalReads.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ShortCircuitLocalReads.apt.vm @@ -19,8 +19,6 @@ HDFS Short-Circuit Local Reads - \[ {{{./index.html}Go Back}} \] - %{toc|section=1|fromDepth=0} * {Background} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/WebHDFS.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/WebHDFS.apt.vm index 71063c3a69..773b99690e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/WebHDFS.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/WebHDFS.apt.vm @@ -18,8 +18,6 @@ WebHDFS REST API - \[ {{{./index.html}Go Back}} \] - %{toc|section=1|fromDepth=0} * {Document Conventions} @@ -54,7 +52,7 @@ WebHDFS REST API * {{{Status of a File/Directory}<<>>}} (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.getFileStatus) - * {{<<>>}} + * {{{List a Directory}<<>>}} (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.listStatus) * {{{Get Content Summary of a Directory}<<>>}} @@ -109,7 +107,7 @@ WebHDFS REST API * {{{Append to a File}<<>>}} (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.append) - * {{{Concatenate Files}<<>>}} + * {{{Concat File(s)}<<>>}} (see {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.concat) * HTTP DELETE @@ -871,7 +869,7 @@ Content-Length: 0 * {Error Responses} When an operation fails, the server may throw an exception. - The JSON schema of error responses is defined in {{<<>> JSON schema}}. + The JSON schema of error responses is defined in {{{RemoteException JSON Schema}}}. The table below shows the mapping from exceptions to HTTP response codes. ** {HTTP Response Codes} @@ -1119,7 +1117,7 @@ Transfer-Encoding: chunked See also: {{{FileStatus Properties}<<>> Properties}}, {{{Status of a File/Directory}<<>>}}, - {{{../../api/org/apache/hadoop/fs/FileStatus}FileStatus}} + {{{../../api/org/apache/hadoop/fs/FileStatus.html}FileStatus}} *** {FileStatus Properties} @@ -1232,7 +1230,7 @@ var fileStatusProperties = See also: {{{FileStatus Properties}<<>> Properties}}, {{{List a Directory}<<>>}}, - {{{../../api/org/apache/hadoop/fs/FileStatus}FileStatus}} + {{{../../api/org/apache/hadoop/fs/FileStatus.html}FileStatus}} ** {Long JSON Schema} @@ -1275,7 +1273,7 @@ var fileStatusProperties = See also: {{{Get Home Directory}<<>>}}, - {{{../../api/org/apache/hadoop/fs/Path}Path}} + {{{../../api/org/apache/hadoop/fs/Path.html}Path}} ** {RemoteException JSON Schema} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java index e9cb6cca9b..919377a3c1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java @@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs; import com.google.common.base.Charsets; import com.google.common.base.Joiner; + +import org.apache.commons.io.FileUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -27,7 +29,6 @@ import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem.Statistics; import org.apache.hadoop.fs.Options.Rename; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.MiniDFSCluster.NameNodeInfo; import org.apache.hadoop.hdfs.client.HdfsDataInputStream; @@ -889,21 +890,7 @@ public class DFSTestUtil { /** Copy one file's contents into the other **/ public static void copyFile(File src, File dest) throws IOException { - InputStream in = null; - OutputStream out = null; - - try { - in = new FileInputStream(src); - out = new FileOutputStream(dest); - - byte [] b = new byte[1024]; - while( in.read(b) > 0 ) { - out.write(b); - } - } finally { - if(in != null) in.close(); - if(out != null) out.close(); - } + FileUtils.copyFile(src, dest); } public static class Builder { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java index cf72e7920e..1d5728dbd2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java @@ -118,6 +118,20 @@ public class TestDFSUtil { assertEquals(0, bs.length); } + /** + * Test constructing LocatedBlock with null cachedLocs + */ + @Test + public void testLocatedBlockConstructorWithNullCachedLocs() { + DatanodeInfo d = DFSTestUtil.getLocalDatanodeInfo(); + DatanodeInfo[] ds = new DatanodeInfo[1]; + ds[0] = d; + + ExtendedBlock b1 = new ExtendedBlock("bpid", 1, 1, 1); + LocatedBlock l1 = new LocatedBlock(b1, ds, null, null, 0, false, null); + final DatanodeInfo[] cachedLocs = l1.getCachedLocations(); + assertTrue(cachedLocs.length == 0); + } private Configuration setupAddress(String key) { HdfsConfiguration conf = new HdfsConfiguration(); diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml index de24038c23..3fce9e270e 100644 --- a/hadoop-project/src/site/site.xml +++ b/hadoop-project/src/site/site.xml @@ -59,6 +59,7 @@ +