Merging r1548329 through r1549625 from trunk to branch HDFS-2832

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2832@1549626 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Arpit Agarwal 2013-12-09 17:38:20 +00:00
commit 43c33491fc
100 changed files with 1769 additions and 676 deletions

View File

@ -487,6 +487,9 @@ Release 2.3.0 - UNRELEASED
OPTIMIZATIONS
HADOOP-10142. Avoid groups lookup for unprivileged users such as "dr.who"
(vinay via cmccabe)
BUG FIXES
HADOOP-10028. Malformed ssl-server.xml.example. (Haohui Mai via jing9)

View File

@ -204,6 +204,14 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
public static final String DEFAULT_HADOOP_HTTP_STATIC_USER =
"dr.who";
/**
* User->groups static mapping to override the groups lookup
*/
public static final String HADOOP_USER_GROUP_STATIC_OVERRIDES =
"hadoop.user.group.static.mapping.overrides";
public static final String HADOOP_USER_GROUP_STATIC_OVERRIDES_DEFAULT =
"dr.who=;";
/** Enable/Disable aliases serving from jetty */
public static final String HADOOP_JETTY_LOGS_SERVE_ALIASES =
"hadoop.jetty.logs.serve.aliases";

View File

@ -18,15 +18,20 @@
package org.apache.hadoop.security;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time;
import org.apache.commons.logging.Log;
@ -49,6 +54,8 @@ public class Groups {
private final Map<String, CachedGroups> userToGroupsMap =
new ConcurrentHashMap<String, CachedGroups>();
private final Map<String, List<String>> staticUserToGroupsMap =
new HashMap<String, List<String>>();
private final long cacheTimeout;
private final long warningDeltaMs;
@ -66,12 +73,43 @@ public Groups(Configuration conf) {
warningDeltaMs =
conf.getLong(CommonConfigurationKeys.HADOOP_SECURITY_GROUPS_CACHE_WARN_AFTER_MS,
CommonConfigurationKeys.HADOOP_SECURITY_GROUPS_CACHE_WARN_AFTER_MS_DEFAULT);
parseStaticMapping(conf);
if(LOG.isDebugEnabled())
LOG.debug("Group mapping impl=" + impl.getClass().getName() +
"; cacheTimeout=" + cacheTimeout + "; warningDeltaMs=" +
warningDeltaMs);
}
/*
* Parse the hadoop.user.group.static.mapping.overrides configuration to
* staticUserToGroupsMap
*/
private void parseStaticMapping(Configuration conf) {
String staticMapping = conf.get(
CommonConfigurationKeys.HADOOP_USER_GROUP_STATIC_OVERRIDES,
CommonConfigurationKeys.HADOOP_USER_GROUP_STATIC_OVERRIDES_DEFAULT);
Collection<String> mappings = StringUtils.getStringCollection(
staticMapping, ";");
for (String users : mappings) {
Collection<String> userToGroups = StringUtils.getStringCollection(users,
"=");
if (userToGroups.size() < 1 || userToGroups.size() > 2) {
throw new HadoopIllegalArgumentException("Configuration "
+ CommonConfigurationKeys.HADOOP_USER_GROUP_STATIC_OVERRIDES
+ " is invalid");
}
String[] userToGroupsArray = userToGroups.toArray(new String[userToGroups
.size()]);
String user = userToGroupsArray[0];
List<String> groups = Collections.emptyList();
if (userToGroupsArray.length == 2) {
groups = (List<String>) StringUtils
.getStringCollection(userToGroupsArray[1]);
}
staticUserToGroupsMap.put(user, groups);
}
}
/**
* Get the group memberships of a given user.
@ -80,6 +118,11 @@ public Groups(Configuration conf) {
* @throws IOException
*/
public List<String> getGroups(String user) throws IOException {
// No need to lookup for groups of static users
List<String> staticMapping = staticUserToGroupsMap.get(user);
if (staticMapping != null) {
return staticMapping;
}
// Return cached value if available
CachedGroups groups = userToGroupsMap.get(user);
long startMs = Time.monotonicNow();

View File

@ -325,10 +325,24 @@ public static String[] getStrings(String str){
* @return an <code>ArrayList</code> of string values
*/
public static Collection<String> getStringCollection(String str){
String delim = ",";
return getStringCollection(str, delim);
}
/**
* Returns a collection of strings.
*
* @param str
* String to parse
* @param delim
* delimiter to separate the values
* @return Collection of parsed elements.
*/
public static Collection<String> getStringCollection(String str, String delim) {
List<String> values = new ArrayList<String>();
if (str == null)
return values;
StringTokenizer tokenizer = new StringTokenizer (str,",");
StringTokenizer tokenizer = new StringTokenizer(str, delim);
values = new ArrayList<String>();
while (tokenizer.hasMoreTokens()) {
values.add(tokenizer.nextToken());

View File

@ -1261,4 +1261,18 @@
Specify the port number used by Hadoop mount daemon.
</description>
</property>
<property>
<name>hadoop.user.group.static.mapping.overrides</name>
<value>dr.who=;</value>
<description>
Static mapping of user to groups. This will override the groups if
available in the system for the specified user. In otherwords, groups
look-up will not happen for these users, instead groups mapped in this
configuration will be used.
Mapping should be in this format.
user1=group1,group2;user2=;user3=group2;
Default, "dr.who=;" will consider "dr.who" as user without groups.
</description>
</property>
</configuration>

View File

@ -19,14 +19,17 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.fail;
import org.apache.commons.logging.Log;
@ -40,10 +43,12 @@
public class TestGroupsCaching {
public static final Log LOG = LogFactory.getLog(TestGroupsCaching.class);
private static Configuration conf = new Configuration();
private static String[] myGroups = {"grp1", "grp2"};
private Configuration conf;
static {
@Before
public void setup() {
conf = new Configuration();
conf.setClass(CommonConfigurationKeys.HADOOP_SECURITY_GROUP_MAPPING,
FakeGroupMapping.class,
ShellBasedUnixGroupsMapping.class);
@ -88,7 +93,7 @@ public static void addToBlackList(String user) throws IOException {
}
@Test
public void TestGroupsCaching() throws Exception {
public void testGroupsCaching() throws Exception {
Groups groups = new Groups(conf);
groups.cacheGroupsAdd(Arrays.asList(myGroups));
groups.refresh();
@ -117,4 +122,45 @@ public void TestGroupsCaching() throws Exception {
FakeGroupMapping.clearBlackList();
assertTrue(groups.getGroups("user1").size() == 2);
}
public static class FakeunPrivilegedGroupMapping extends FakeGroupMapping {
private static boolean invoked = false;
@Override
public List<String> getGroups(String user) throws IOException {
invoked = true;
return super.getGroups(user);
}
}
/*
* Group lookup should not happen for static users
*/
@Test
public void testGroupLookupForStaticUsers() throws Exception {
conf.setClass(CommonConfigurationKeys.HADOOP_SECURITY_GROUP_MAPPING,
FakeunPrivilegedGroupMapping.class, ShellBasedUnixGroupsMapping.class);
conf.set(CommonConfigurationKeys.HADOOP_USER_GROUP_STATIC_OVERRIDES, "me=;user1=group1;user2=group1,group2");
Groups groups = new Groups(conf);
List<String> userGroups = groups.getGroups("me");
assertTrue("non-empty groups for static user", userGroups.isEmpty());
assertFalse("group lookup done for static user",
FakeunPrivilegedGroupMapping.invoked);
List<String> expected = new ArrayList<String>();
expected.add("group1");
FakeunPrivilegedGroupMapping.invoked = false;
userGroups = groups.getGroups("user1");
assertTrue("groups not correct", expected.equals(userGroups));
assertFalse("group lookup done for unprivileged user",
FakeunPrivilegedGroupMapping.invoked);
expected.add("group2");
FakeunPrivilegedGroupMapping.invoked = false;
userGroups = groups.getGroups("user2");
assertTrue("groups not correct", expected.equals(userGroups));
assertFalse("group lookup done for unprivileged user",
FakeunPrivilegedGroupMapping.invoked);
}
}

View File

@ -233,6 +233,12 @@ Trunk (Unreleased)
HDFS-5630. Hook up cache directive and pool usage statistics. (wang)
HDFS-5312. Generate HTTP / HTTPS URL in DFSUtil#getInfoServer() based on the
configured http policy. (Haohui Mai via jing9)
HDFS-5554. Flatten INodeFile hierarchy: Replace INodeFileWithSnapshot with
FileWithSnapshotFeature. (jing9 via szetszwo)
OPTIMIZATIONS
HDFS-5349. DNA_CACHE and DNA_UNCACHE should be by blockId only. (cmccabe)
@ -576,6 +582,11 @@ Release 2.4.0 - UNRELEASED
HDFS-5581. NameNodeFsck should use only one instance of
BlockPlacementPolicy. (vinay via cmccabe)
HDFS-5633. Improve OfflineImageViewer to use less memory. (jing9)
HDFS-4983. Numeric usernames do not work with WebHDFS FS. (Yongjun Zhang via
jing9)
OPTIMIZATIONS
HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn)
@ -784,6 +795,12 @@ Release 2.3.0 - UNRELEASED
HDFS-5587. add debug information when NFS fails to start with duplicate user
or group names (brandonli)
HDFS-5590. Block ID and generation stamp may be reused when persistBlocks is
set to false. (jing9)
HDFS-5353. Short circuit reads fail when dfs.encrypt.data.transfer is
enabled. (Colin Patrick McCabe via jing9)
Release 2.2.0 - 2013-10-13
INCOMPATIBLE CHANGES

View File

@ -164,10 +164,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final int DFS_NAMENODE_REPLICATION_STREAMS_HARD_LIMIT_DEFAULT = 4;
public static final String DFS_WEBHDFS_ENABLED_KEY = "dfs.webhdfs.enabled";
public static final boolean DFS_WEBHDFS_ENABLED_DEFAULT = true;
public static final String DFS_WEBHDFS_USER_PATTERN_KEY = "dfs.webhdfs.user.provider.user.pattern";
public static final String DFS_WEBHDFS_USER_PATTERN_DEFAULT = "^[A-Za-z_][A-Za-z0-9._-]*[$]?$";
public static final String DFS_PERMISSIONS_ENABLED_KEY = "dfs.permissions.enabled";
public static final boolean DFS_PERMISSIONS_ENABLED_DEFAULT = true;
public static final String DFS_PERSIST_BLOCKS_KEY = "dfs.persist.blocks";
public static final boolean DFS_PERSIST_BLOCKS_DEFAULT = false;
public static final String DFS_PERMISSIONS_SUPERUSERGROUP_KEY = "dfs.permissions.superusergroup";
public static final String DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT = "supergroup";
public static final String DFS_ADMIN = "dfs.cluster.administrators";

View File

@ -92,6 +92,7 @@
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.ToolRunner;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
@ -975,39 +976,71 @@ public static String getNameServiceIdFromAddress(final Configuration conf,
* given namenode rpc address.
* @param conf
* @param namenodeAddr - namenode RPC address
* @param httpsAddress -If true, and if security is enabled, returns server
* https address. If false, returns server http address.
* @param scheme - the scheme (http / https)
* @return server http or https address
* @throws IOException
*/
public static String getInfoServer(InetSocketAddress namenodeAddr,
Configuration conf, boolean httpsAddress) throws IOException {
boolean securityOn = UserGroupInformation.isSecurityEnabled();
String httpAddressKey = (securityOn && httpsAddress) ?
DFS_NAMENODE_HTTPS_ADDRESS_KEY : DFS_NAMENODE_HTTP_ADDRESS_KEY;
String httpAddressDefault = (securityOn && httpsAddress) ?
DFS_NAMENODE_HTTPS_ADDRESS_DEFAULT : DFS_NAMENODE_HTTP_ADDRESS_DEFAULT;
String suffixes[];
public static URI getInfoServer(InetSocketAddress namenodeAddr,
Configuration conf, String scheme) throws IOException {
String[] suffixes = null;
if (namenodeAddr != null) {
// if non-default namenode, try reverse look up
// the nameServiceID if it is available
suffixes = getSuffixIDs(conf, namenodeAddr,
DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
} else {
suffixes = new String[2];
}
String configuredInfoAddr = getSuffixedConf(conf, httpAddressKey,
httpAddressDefault, suffixes);
String authority;
if ("http".equals(scheme)) {
authority = getSuffixedConf(conf, DFS_NAMENODE_HTTP_ADDRESS_KEY,
DFS_NAMENODE_HTTP_ADDRESS_DEFAULT, suffixes);
} else if ("https".equals(scheme)) {
authority = getSuffixedConf(conf, DFS_NAMENODE_HTTPS_ADDRESS_KEY,
DFS_NAMENODE_HTTPS_ADDRESS_DEFAULT, suffixes);
} else {
throw new IllegalArgumentException("Invalid scheme:" + scheme);
}
if (namenodeAddr != null) {
return substituteForWildcardAddress(configuredInfoAddr,
authority = substituteForWildcardAddress(authority,
namenodeAddr.getHostName());
} else {
return configuredInfoAddr;
}
return URI.create(scheme + "://" + authority);
}
/**
* Lookup the HTTP / HTTPS address of the namenode, and replace its hostname
* with defaultHost when it found out that the address is a wildcard / local
* address.
*
* @param defaultHost
* The default host name of the namenode.
* @param conf
* The configuration
* @param scheme
* HTTP or HTTPS
* @throws IOException
*/
public static URI getInfoServerWithDefaultHost(String defaultHost,
Configuration conf, final String scheme) throws IOException {
URI configuredAddr = getInfoServer(null, conf, scheme);
String authority = substituteForWildcardAddress(
configuredAddr.getAuthority(), defaultHost);
return URI.create(scheme + "://" + authority);
}
/**
* Determine whether HTTP or HTTPS should be used to connect to the remote
* server. Currently the client only connects to the server via HTTPS if the
* policy is set to HTTPS_ONLY.
*
* @return the scheme (HTTP / HTTPS)
*/
public static String getHttpClientScheme(Configuration conf) {
HttpConfig.Policy policy = DFSUtil.getHttpPolicy(conf);
return policy == HttpConfig.Policy.HTTPS_ONLY ? "https" : "http";
}
/**
* Substitute a default host in the case that an address has been configured
@ -1021,8 +1054,9 @@ public static String getInfoServer(InetSocketAddress namenodeAddr,
* @return the substituted address
* @throws IOException if it is a wildcard address and security is enabled
*/
public static String substituteForWildcardAddress(String configuredAddress,
String defaultHost) throws IOException {
@VisibleForTesting
static String substituteForWildcardAddress(String configuredAddress,
String defaultHost) throws IOException {
InetSocketAddress sockAddr = NetUtils.createSocketAddr(configuredAddress);
InetSocketAddress defaultSockAddr = NetUtils.createSocketAddr(defaultHost
+ ":0");

View File

@ -125,4 +125,9 @@ public String toString() {
public DomainSocket getDomainSocket() {
return null;
}
@Override
public boolean hasSecureChannel() {
return false;
}
}

View File

@ -114,4 +114,19 @@ public String toString() {
public DomainSocket getDomainSocket() {
return socket;
}
@Override
public boolean hasSecureChannel() {
//
// Communication over domain sockets is assumed to be secure, since it
// doesn't pass over any network. We also carefully control the privileges
// that can be used on the domain socket inode and its parent directories.
// See #{java.org.apache.hadoop.net.unix.DomainSocket#validateSocketPathSecurity0}
// for details.
//
// So unless you are running as root or the hdfs superuser, you cannot
// launch a man-in-the-middle attach on UNIX domain socket traffic.
//
return true;
}
}

View File

@ -139,4 +139,9 @@ public String toString() {
public DomainSocket getDomainSocket() {
return enclosedPeer.getDomainSocket();
}
@Override
public boolean hasSecureChannel() {
return true;
}
}

View File

@ -128,4 +128,9 @@ public String toString() {
public DomainSocket getDomainSocket() {
return null;
}
@Override
public boolean hasSecureChannel() {
return false;
}
}

View File

@ -112,4 +112,12 @@ public interface Peer extends Closeable {
* peer, or null if there is none.
*/
public DomainSocket getDomainSocket();
/**
* Return true if the channel is secure.
*
* @return True if our channel to this peer is not
* susceptible to man-in-the-middle attacks.
*/
public boolean hasSecureChannel();
}

View File

@ -162,7 +162,7 @@ public void run() {
try {
peer.setWriteTimeout(datanode.getDnConf().socketWriteTimeout);
InputStream input = socketIn;
if (dnConf.encryptDataTransfer) {
if ((!peer.hasSecureChannel()) && dnConf.encryptDataTransfer) {
IOStreamPair encryptedStreams = null;
try {
encryptedStreams = DataTransferEncryptor.getEncryptedStreams(socketOut,

View File

@ -20,6 +20,7 @@
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.SocketTimeoutException;
import java.net.URL;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
@ -79,7 +80,7 @@ public class BackupNode extends NameNode {
/** Name-node RPC address */
String nnRpcAddress;
/** Name-node HTTP address */
String nnHttpAddress;
URL nnHttpAddress;
/** Checkpoint manager */
Checkpointer checkpointManager;
@ -313,7 +314,8 @@ private NamespaceInfo handshake(Configuration conf) throws IOException {
NamenodeProtocol.class, UserGroupInformation.getCurrentUser(),
true).getProxy();
this.nnRpcAddress = NetUtils.getHostPortString(nnAddress);
this.nnHttpAddress = NetUtils.getHostPortString(super.getHttpServerAddress(conf));
this.nnHttpAddress = DFSUtil.getInfoServer(nnAddress, conf,
DFSUtil.getHttpClientScheme(conf)).toURL();
// get version and id info from the name-node
NamespaceInfo nsInfo = null;
while(!isStopRequested()) {

View File

@ -24,11 +24,14 @@
import java.io.File;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
import org.apache.hadoop.hdfs.server.protocol.CheckpointCommand;
import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
@ -61,6 +64,7 @@ class Checkpointer extends Daemon {
private String infoBindAddress;
private CheckpointConf checkpointConf;
private final Configuration conf;
private BackupImage getFSImage() {
return (BackupImage)backupNode.getFSImage();
@ -74,6 +78,7 @@ private NamenodeProtocol getRemoteNamenodeProxy(){
* Create a connection to the primary namenode.
*/
Checkpointer(Configuration conf, BackupNode bnNode) throws IOException {
this.conf = conf;
this.backupNode = bnNode;
try {
initialize(conf);
@ -274,10 +279,15 @@ backupNode.nnHttpAddress, getImageListenAddress(),
+ " New Image Size: " + imageSize);
}
private InetSocketAddress getImageListenAddress() {
private URL getImageListenAddress() {
InetSocketAddress httpSocAddr = backupNode.getHttpAddress();
int httpPort = httpSocAddr.getPort();
return new InetSocketAddress(infoBindAddress, httpPort);
try {
return new URL(DFSUtil.getHttpClientScheme(conf) + "://" + infoBindAddress + ":" + httpPort);
} catch (MalformedURLException e) {
// Unreachable
throw new RuntimeException(e);
}
}
static void rollForwardByApplyingLogs(

View File

@ -21,6 +21,7 @@
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.InetSocketAddress;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
@ -41,7 +42,6 @@
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.DFSUtil.ConfiguredNNAddress;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
import org.apache.hadoop.http.HttpConfig;
import org.apache.hadoop.util.StringUtils;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.map.ObjectMapper;
@ -272,12 +272,13 @@ private int getDatanodeHttpPort(Configuration conf) {
static class NamenodeMXBeanHelper {
private static final ObjectMapper mapper = new ObjectMapper();
private final String host;
private final String httpAddress;
private final URI httpAddress;
NamenodeMXBeanHelper(InetSocketAddress addr, Configuration conf)
throws IOException, MalformedObjectNameException {
this.host = addr.getHostName();
this.httpAddress = DFSUtil.getInfoServer(addr, conf, false);
this.httpAddress = DFSUtil.getInfoServer(addr, conf,
DFSUtil.getHttpClientScheme(conf));
}
/** Get the map corresponding to the JSON string */
@ -356,7 +357,7 @@ public NamenodeStatus getNamenodeStatus(String props) throws IOException,
nn.blocksCount = getProperty(props, "TotalBlocks").getLongValue();
nn.missingBlocksCount = getProperty(props, "NumberOfMissingBlocks")
.getLongValue();
nn.httpAddress = httpAddress;
nn.httpAddress = httpAddress.toURL();
getLiveNodeCount(getProperty(props, "LiveNodes").getValueAsText(), nn);
getDeadNodeCount(getProperty(props, "DeadNodes").getValueAsText(), nn);
nn.softwareVersion = getProperty(props, "SoftwareVersion").getTextValue();
@ -591,12 +592,14 @@ public void toXML(XMLOutputter doc) throws IOException {
toXmlItemBlock(doc, "Blocks", Long.toString(nn.blocksCount));
toXmlItemBlock(doc, "Missing Blocks",
Long.toString(nn.missingBlocksCount));
toXmlItemBlockWithLink(doc, nn.liveDatanodeCount + " (" +
nn.liveDecomCount + ")", nn.httpAddress+"/dfsnodelist.jsp?whatNodes=LIVE",
"Live Datanode (Decommissioned)");
toXmlItemBlockWithLink(doc, nn.deadDatanodeCount + " (" +
nn.deadDecomCount + ")", nn.httpAddress+"/dfsnodelist.jsp?whatNodes=DEAD"
, "Dead Datanode (Decommissioned)");
toXmlItemBlockWithLink(doc, nn.liveDatanodeCount + " ("
+ nn.liveDecomCount + ")", new URL(nn.httpAddress,
"/dfsnodelist.jsp?whatNodes=LIVE"),
"Live Datanode (Decommissioned)");
toXmlItemBlockWithLink(doc, nn.deadDatanodeCount + " ("
+ nn.deadDecomCount + ")", new URL(nn.httpAddress,
"/dfsnodelist.jsp?whatNodes=DEAD"),
"Dead Datanode (Decommissioned)");
toXmlItemBlock(doc, "Software Version", nn.softwareVersion);
doc.endTag(); // node
}
@ -625,7 +628,7 @@ static class NamenodeStatus {
int liveDecomCount = 0;
int deadDatanodeCount = 0;
int deadDecomCount = 0;
String httpAddress = null;
URL httpAddress = null;
String softwareVersion = "";
}
@ -763,7 +766,8 @@ public void toXML(XMLOutputter doc) throws IOException {
.equals(DecommissionStates.UNKNOWN.toString()))) {
doc.startTag("node");
// dn
toXmlItemBlockWithLink(doc, dnhost, (dnhost+":"+httpPort),"DataNode");
toXmlItemBlockWithLink(doc, dnhost, new URL("http", dnhost, httpPort,
""), "DataNode");
// overall status first
toXmlItemBlock(doc, OVERALL_STATUS, overallStatus);
@ -823,11 +827,11 @@ private static void toXmlItemBlock(XMLOutputter doc, String key, String value)
* link="http://hostname:50070" />
*/
private static void toXmlItemBlockWithLink(XMLOutputter doc, String value,
String url, String label) throws IOException {
URL url, String label) throws IOException {
doc.startTag("item");
doc.attribute("label", label);
doc.attribute("value", value);
doc.attribute("link", "///" + url);
doc.attribute("link", url.toString());
doc.endTag(); // item
}
@ -885,7 +889,7 @@ private static String readOutput(URL url) throws IOException {
return out.toString();
}
private static String queryMbean(String httpAddress, Configuration conf)
private static String queryMbean(URI httpAddress, Configuration conf)
throws IOException {
/**
* Although the other namenode might support HTTPS, it is fundamentally
@ -896,7 +900,7 @@ private static String queryMbean(String httpAddress, Configuration conf)
*
* As a result, we just hard code the connection as an HTTP connection.
*/
URL url = new URL("http://" + httpAddress + JMX_QRY);
URL url = new URL(httpAddress.toURL(), JMX_QRY);
return readOutput(url);
}
/**

View File

@ -55,7 +55,6 @@
import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectoryWithSnapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeFileWithSnapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat;
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
@ -697,11 +696,9 @@ INode loadINode(final byte[] localName, boolean isSnapshotINode,
modificationTime, atime, blocks, replication, blockSize);
if (underConstruction) {
file.toUnderConstruction(clientName, clientMachine, null);
return fileDiffs == null ? file : new INodeFileWithSnapshot(file,
fileDiffs);
return fileDiffs == null ? file : new INodeFile(file, fileDiffs);
} else {
return fileDiffs == null ? file :
new INodeFileWithSnapshot(file, fileDiffs);
return fileDiffs == null ? file : new INodeFile(file, fileDiffs);
}
} else if (numBlocks == -1) {
//directory

View File

@ -79,8 +79,6 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERSIST_BLOCKS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERSIST_BLOCKS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT;
@ -154,6 +152,8 @@
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
@ -165,8 +165,6 @@
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
@ -198,7 +196,6 @@
import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable.SnapshotDiffInfo;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeFileWithSnapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
@ -360,7 +357,6 @@ private void logAuditEvent(boolean succeeded,
static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100;
static int BLOCK_DELETION_INCREMENT = 1000;
private final boolean isPermissionEnabled;
private final boolean persistBlocks;
private final UserGroupInformation fsOwner;
private final String fsOwnerShortUserName;
private final String supergroup;
@ -665,13 +661,10 @@ public static FSNamesystem loadFromDisk(Configuration conf)
LOG.info("supergroup = " + supergroup);
LOG.info("isPermissionEnabled = " + isPermissionEnabled);
final boolean persistBlocks = conf.getBoolean(DFS_PERSIST_BLOCKS_KEY,
DFS_PERSIST_BLOCKS_DEFAULT);
// block allocation has to be persisted in HA using a shared edits directory
// so that the standby has up-to-date namespace information
String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId);
this.persistBlocks = persistBlocks || (haEnabled && HAUtil.usesSharedEditsDir(conf));
// Sanity check the HA-related config.
if (nameserviceId != null) {
@ -1766,7 +1759,7 @@ private void concatInternal(FSPermissionChecker pc, String target,
throw new HadoopIllegalArgumentException("concat: target file "
+ target + " is empty");
}
if (trgInode instanceof INodeFileWithSnapshot) {
if (trgInode.isWithSnapshot()) {
throw new HadoopIllegalArgumentException("concat: target file "
+ target + " is in a snapshot");
}
@ -2630,9 +2623,7 @@ LocatedBlock getAdditionalBlock(String src, long fileId, String clientName,
} finally {
writeUnlock();
}
if (persistBlocks) {
getEditLog().logSync();
}
getEditLog().logSync();
// Return located block
return makeLocatedBlock(newBlock, targets, offset);
@ -2818,9 +2809,7 @@ boolean abandonBlock(ExtendedBlock b, String src, String holder)
} finally {
writeUnlock();
}
if (persistBlocks) {
getEditLog().logSync();
}
getEditLog().logSync();
return true;
}

View File

@ -21,6 +21,7 @@
import java.util.*;
import java.io.*;
import java.net.InetSocketAddress;
import java.net.URL;
import javax.servlet.ServletContext;
import javax.servlet.ServletException;
@ -31,10 +32,8 @@
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSUtil;
@ -87,8 +86,8 @@ public void doGet(final HttpServletRequest request,
ServletContext context = getServletContext();
final FSImage nnImage = NameNodeHttpServer.getFsImageFromContext(context);
final GetImageParams parsedParams = new GetImageParams(request, response);
final Configuration conf =
(Configuration)getServletContext().getAttribute(JspHelper.CURRENT_CONF);
final Configuration conf = (Configuration) context
.getAttribute(JspHelper.CURRENT_CONF);
if (UserGroupInformation.isSecurityEnabled() &&
!isValidRequestor(context, request.getUserPrincipal().getName(), conf)) {
@ -163,7 +162,7 @@ public Void run() throws Exception {
// issue a HTTP get request to download the new fsimage
MD5Hash downloadImageDigest =
TransferFsImage.downloadImageToStorage(
parsedParams.getInfoServer(), txid,
parsedParams.getInfoServer(conf), txid,
nnImage.getStorage(), true);
nnImage.saveDigestAndRenameCheckpointImage(txid, downloadImageDigest);
@ -309,7 +308,9 @@ static String getParamStringForLog(RemoteEditLog log,
}
static String getParamStringToPutImage(long txid,
InetSocketAddress imageListenAddress, Storage storage) {
URL url, Storage storage) {
InetSocketAddress imageListenAddress = NetUtils.createSocketAddr(url
.getAuthority());
String machine = !imageListenAddress.isUnresolved()
&& imageListenAddress.getAddress().isAnyLocalAddress() ? null
: imageListenAddress.getHostName();
@ -419,11 +420,11 @@ boolean isPutImage() {
return isPutImage;
}
String getInfoServer() throws IOException{
URL getInfoServer(Configuration conf) throws IOException {
if (machineName == null || remoteport == 0) {
throw new IOException ("MachineName and port undefined");
throw new IOException("MachineName and port undefined");
}
return machineName + ":" + remoteport;
return new URL(DFSUtil.getHttpClientScheme(conf), machineName, remoteport, "");
}
boolean shouldFetchLatest() {

View File

@ -34,7 +34,6 @@
import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithCount;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectoryWithSnapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeFileWithSnapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
import org.apache.hadoop.hdfs.util.ReadOnlyList;
@ -322,23 +321,6 @@ INodeReference.WithName replaceChild4ReferenceWithName(INode oldChild,
replaceChild(oldChild, ref, null);
return ref;
}
private void replaceChildFile(final INodeFile oldChild,
final INodeFile newChild, final INodeMap inodeMap) {
replaceChild(oldChild, newChild, inodeMap);
oldChild.clear();
newChild.updateBlockCollection();
}
/** Replace a child {@link INodeFile} with an {@link INodeFileWithSnapshot}. */
INodeFileWithSnapshot replaceChild4INodeFileWithSnapshot(
final INodeFile child, final INodeMap inodeMap) {
Preconditions.checkArgument(!(child instanceof INodeFileWithSnapshot),
"Child file is already an INodeFileWithSnapshot, child=" + child);
final INodeFileWithSnapshot newChild = new INodeFileWithSnapshot(child);
replaceChildFile(child, newChild, inodeMap);
return newChild;
}
@Override
public INodeDirectory recordModification(Snapshot latest,

View File

@ -31,7 +31,7 @@
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiff;
import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeFileWithSnapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.FileWithSnapshotFeature;
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
import com.google.common.annotations.VisibleForTesting;
@ -137,24 +137,11 @@ public INodeFile(INodeFile that) {
this.blocks = that.blocks;
this.headFeature = that.headFeature;
}
/**
* If the inode contains a {@link FileUnderConstructionFeature}, return it;
* otherwise, return null.
*/
public final FileUnderConstructionFeature getFileUnderConstructionFeature() {
for (Feature f = this.headFeature; f != null; f = f.nextFeature) {
if (f instanceof FileUnderConstructionFeature) {
return (FileUnderConstructionFeature) f;
}
}
return null;
}
/** Is this file under construction? */
@Override // BlockCollection
public boolean isUnderConstruction() {
return getFileUnderConstructionFeature() != null;
public INodeFile(INodeFile that, FileDiffList diffs) {
this(that);
Preconditions.checkArgument(!that.isWithSnapshot());
this.addSnapshotFeature(diffs);
}
private void addFeature(Feature f) {
@ -179,6 +166,25 @@ public final INodeFile asFile() {
/* Start of Under-Construction Feature */
/**
* If the inode contains a {@link FileUnderConstructionFeature}, return it;
* otherwise, return null.
*/
public final FileUnderConstructionFeature getFileUnderConstructionFeature() {
for (Feature f = this.headFeature; f != null; f = f.nextFeature) {
if (f instanceof FileUnderConstructionFeature) {
return (FileUnderConstructionFeature) f;
}
}
return null;
}
/** Is this file under construction? */
@Override // BlockCollection
public boolean isUnderConstruction() {
return getFileUnderConstructionFeature() != null;
}
/** Convert this file to an {@link INodeFileUnderConstruction}. */
INodeFile toUnderConstruction(String clientName, String clientMachine,
DatanodeDescriptor clientNode) {
@ -263,24 +269,75 @@ boolean removeLastBlock(Block oldblock) {
}
/* End of Under-Construction Feature */
/* Start of Snapshot Feature */
private FileWithSnapshotFeature addSnapshotFeature(FileDiffList diffs) {
FileWithSnapshotFeature sf = new FileWithSnapshotFeature(diffs);
this.addFeature(sf);
return sf;
}
/**
* If feature list contains a {@link FileWithSnapshotFeature}, return it;
* otherwise, return null.
*/
public final FileWithSnapshotFeature getFileWithSnapshotFeature() {
for (Feature f = headFeature; f != null; f = f.nextFeature) {
if (f instanceof FileWithSnapshotFeature) {
return (FileWithSnapshotFeature) f;
}
}
return null;
}
/** Is this file has the snapshot feature? */
public final boolean isWithSnapshot() {
return getFileWithSnapshotFeature() != null;
}
@Override
public String toDetailString() {
FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
return super.toDetailString() + (sf == null ? "" : sf.getDetailedString());
}
@Override
public INodeFileAttributes getSnapshotINode(final Snapshot snapshot) {
return this;
FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
if (sf != null) {
return sf.getSnapshotINode(this, snapshot);
} else {
return this;
}
}
@Override
public INodeFile recordModification(final Snapshot latest,
final INodeMap inodeMap) throws QuotaExceededException {
if (isInLatestSnapshot(latest)) {
INodeFileWithSnapshot newFile = getParent()
.replaceChild4INodeFileWithSnapshot(this, inodeMap)
.recordModification(latest, inodeMap);
return newFile;
} else {
return this;
// the file is in snapshot, create a snapshot feature if it does not have
FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
if (sf == null) {
sf = addSnapshotFeature(null);
}
// record self in the diff list if necessary
if (!shouldRecordInSrcSnapshot(latest)) {
sf.getDiffs().saveSelf2Snapshot(latest, this, null);
}
}
return this;
}
public FileDiffList getDiffs() {
FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
if (sf != null) {
return sf.getDiffs();
}
return null;
}
/* End of Snapshot Feature */
/** @return the replication factor of the file. */
public final short getFileReplication(Snapshot snapshot) {
@ -292,14 +349,23 @@ public final short getFileReplication(Snapshot snapshot) {
}
/** The same as getFileReplication(null). */
@Override
@Override // INodeFileAttributes
public final short getFileReplication() {
return getFileReplication(null);
}
@Override
@Override // BlockCollection
public short getBlockReplication() {
return getFileReplication(null);
short max = getFileReplication(null);
FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
if (sf != null) {
short maxInSnapshot = sf.getMaxBlockRepInDiffs();
if (sf.isCurrentFileDeleted()) {
return maxInSnapshot;
}
max = maxInSnapshot > max ? maxInSnapshot : max;
}
return max;
}
/** Set the replication factor of this file. */
@ -392,12 +458,20 @@ public Quota.Counts cleanSubtree(final Snapshot snapshot, Snapshot prior,
final BlocksMapUpdateInfo collectedBlocks,
final List<INode> removedINodes, final boolean countDiffChange)
throws QuotaExceededException {
FileWithSnapshotFeature sf = getFileWithSnapshotFeature();
if (sf != null) {
return sf.cleanFile(this, snapshot, prior, collectedBlocks,
removedINodes, countDiffChange);
}
Quota.Counts counts = Quota.Counts.newInstance();
if (snapshot == null && prior == null) {
// this only happens when deleting the current file
if (snapshot == null && prior == null) {
// this only happens when deleting the current file and the file is not
// in any snapshot
computeQuotaUsage(counts, false);
destroyAndCollectBlocks(collectedBlocks, removedINodes);
} else if (snapshot == null && prior != null) {
// when deleting the current file and the file is in snapshot, we should
// clean the 0-sized block if the file is UC
FileUnderConstructionFeature uc = getFileUnderConstructionFeature();
if (uc != null) {
uc.cleanZeroSizeBlock(this, collectedBlocks);
@ -419,8 +493,9 @@ public void destroyAndCollectBlocks(BlocksMapUpdateInfo collectedBlocks,
clear();
removedINodes.add(this);
if (this instanceof INodeFileWithSnapshot) {
((INodeFileWithSnapshot) this).getDiffs().clear();
FileWithSnapshotFeature sf = getFileWithSnapshotFeature();
if (sf != null) {
sf.clearDiffs();
}
}
@ -435,8 +510,9 @@ public final Quota.Counts computeQuotaUsage(Quota.Counts counts,
boolean useCache, int lastSnapshotId) {
long nsDelta = 1;
final long dsDelta;
if (this instanceof INodeFileWithSnapshot) {
FileDiffList fileDiffList = ((INodeFileWithSnapshot) this).getDiffs();
FileWithSnapshotFeature sf = getFileWithSnapshotFeature();
if (sf != null) {
FileDiffList fileDiffList = sf.getDiffs();
Snapshot last = fileDiffList.getLastSnapshot();
List<FileDiff> diffs = fileDiffList.asList();
@ -468,16 +544,16 @@ public final ContentSummaryComputationContext computeContentSummary(
private void computeContentSummary4Snapshot(final Content.Counts counts) {
// file length and diskspace only counted for the latest state of the file
// i.e. either the current state or the last snapshot
if (this instanceof INodeFileWithSnapshot) {
final INodeFileWithSnapshot withSnapshot = (INodeFileWithSnapshot) this;
final FileDiffList diffs = withSnapshot.getDiffs();
FileWithSnapshotFeature sf = getFileWithSnapshotFeature();
if (sf != null) {
final FileDiffList diffs = sf.getDiffs();
final int n = diffs.asList().size();
counts.add(Content.FILE, n);
if (n > 0 && withSnapshot.isCurrentFileDeleted()) {
if (n > 0 && sf.isCurrentFileDeleted()) {
counts.add(Content.LENGTH, diffs.getLast().getFileSize());
}
if (withSnapshot.isCurrentFileDeleted()) {
if (sf.isCurrentFileDeleted()) {
final long lastFileSize = diffs.getLast().getFileSize();
counts.add(Content.DISKSPACE, lastFileSize * getBlockReplication());
}
@ -485,8 +561,8 @@ private void computeContentSummary4Snapshot(final Content.Counts counts) {
}
private void computeContentSummary4Current(final Content.Counts counts) {
if (this instanceof INodeFileWithSnapshot
&& ((INodeFileWithSnapshot) this).isCurrentFileDeleted()) {
FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
if (sf != null && sf.isCurrentFileDeleted()) {
return;
}
@ -505,8 +581,9 @@ public final long computeFileSize() {
* otherwise, get the file size from the given snapshot.
*/
public final long computeFileSize(Snapshot snapshot) {
if (snapshot != null && this instanceof INodeFileWithSnapshot) {
final FileDiff d = ((INodeFileWithSnapshot) this).getDiffs().getDiff(
FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
if (snapshot != null && sf != null) {
final FileDiff d = sf.getDiffs().getDiff(
snapshot);
if (d != null) {
return d.getFileSize();

View File

@ -27,7 +27,6 @@
import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectoryWithSnapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeFileWithSnapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
import com.google.common.base.Preconditions;
@ -102,9 +101,8 @@ static Snapshot getPriorSnapshot(INodeReference ref) {
}
if (wn != null) {
INode referred = wc.getReferredINode();
if (referred instanceof INodeFileWithSnapshot) {
return ((INodeFileWithSnapshot) referred).getDiffs().getPrior(
wn.lastSnapshotId);
if (referred.isFile() && referred.asFile().isWithSnapshot()) {
return referred.asFile().getDiffs().getPrior(wn.lastSnapshotId);
} else if (referred instanceof INodeDirectoryWithSnapshot) {
return ((INodeDirectoryWithSnapshot) referred).getDiffs().getPrior(
wn.lastSnapshotId);
@ -547,9 +545,8 @@ public void destroyAndCollectBlocks(BlocksMapUpdateInfo collectedBlocks,
private Snapshot getSelfSnapshot() {
INode referred = getReferredINode().asReference().getReferredINode();
Snapshot snapshot = null;
if (referred instanceof INodeFileWithSnapshot) {
snapshot = ((INodeFileWithSnapshot) referred).getDiffs().getPrior(
lastSnapshotId);
if (referred.isFile() && referred.asFile().isWithSnapshot()) {
snapshot = referred.asFile().getDiffs().getPrior(lastSnapshotId);
} else if (referred instanceof INodeDirectoryWithSnapshot) {
snapshot = ((INodeDirectoryWithSnapshot) referred).getDiffs().getPrior(
lastSnapshotId);
@ -637,12 +634,12 @@ public void destroyAndCollectBlocks(
Snapshot snapshot = getSelfSnapshot(prior);
INode referred = getReferredINode().asReference().getReferredINode();
if (referred instanceof INodeFileWithSnapshot) {
// if referred is a file, it must be a FileWithSnapshot since we did
if (referred.isFile() && referred.asFile().isWithSnapshot()) {
// if referred is a file, it must be a file with Snapshot since we did
// recordModification before the rename
INodeFileWithSnapshot sfile = (INodeFileWithSnapshot) referred;
INodeFile file = referred.asFile();
// make sure we mark the file as deleted
sfile.deleteCurrentFile();
file.getFileWithSnapshotFeature().deleteCurrentFile();
try {
// when calling cleanSubtree of the referred node, since we
// compute quota usage updates before calling this destroy
@ -671,9 +668,8 @@ private Snapshot getSelfSnapshot(final Snapshot prior) {
WithCount wc = (WithCount) getReferredINode().asReference();
INode referred = wc.getReferredINode();
Snapshot lastSnapshot = null;
if (referred instanceof INodeFileWithSnapshot) {
lastSnapshot = ((INodeFileWithSnapshot) referred).getDiffs()
.getLastSnapshot();
if (referred.isFile() && referred.asFile().isWithSnapshot()) {
lastSnapshot = referred.asFile().getDiffs().getLastSnapshot();
} else if (referred instanceof INodeDirectoryWithSnapshot) {
lastSnapshot = ((INodeDirectoryWithSnapshot) referred)
.getLastSnapshot();

View File

@ -39,6 +39,7 @@
import org.apache.hadoop.hdfs.web.AuthFilter;
import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.hadoop.hdfs.web.resources.Param;
import org.apache.hadoop.hdfs.web.resources.UserParam;
import org.apache.hadoop.http.HttpConfig;
import org.apache.hadoop.http.HttpServer;
import org.apache.hadoop.net.NetUtils;
@ -73,7 +74,10 @@ public class NameNodeHttpServer {
private void initWebHdfs(Configuration conf) throws IOException {
if (WebHdfsFileSystem.isEnabled(conf, HttpServer.LOG)) {
//add SPNEGO authentication filter for webhdfs
// set user pattern based on configuration file
UserParam.setUserPattern(conf.get(DFSConfigKeys.DFS_WEBHDFS_USER_PATTERN_KEY, DFSConfigKeys.DFS_WEBHDFS_USER_PATTERN_DEFAULT));
// add SPNEGO authentication filter for webhdfs
final String name = "SPNEGO";
final String classname = AuthFilter.class.getName();
final String pathSpec = WebHdfsFileSystem.PATH_PREFIX + "/*";

View File

@ -29,7 +29,9 @@
import java.io.FilenameFilter;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.security.PrivilegedAction;
import java.security.PrivilegedExceptionAction;
import java.util.Collection;
@ -111,7 +113,7 @@ public class SecondaryNameNode implements Runnable {
private final long starttime = Time.now();
private volatile long lastCheckpointTime = 0;
private String fsName;
private URL fsName;
private CheckpointStorage checkpointImage;
private NamenodeProtocol namenode;
@ -404,7 +406,7 @@ public void doWork() {
* @throws IOException
*/
static boolean downloadCheckpointFiles(
final String nnHostPort,
final URL nnHostPort,
final FSImage dstImage,
final CheckpointSignature sig,
final RemoteEditLogManifest manifest
@ -467,25 +469,33 @@ InetSocketAddress getNameNodeAddress() {
/**
* Returns the Jetty server that the Namenode is listening on.
*/
private String getInfoServer() throws IOException {
private URL getInfoServer() throws IOException {
URI fsName = FileSystem.getDefaultUri(conf);
if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase(fsName.getScheme())) {
throw new IOException("This is not a DFS");
}
String configuredAddress = DFSUtil.getInfoServer(null, conf, false);
String address = DFSUtil.substituteForWildcardAddress(configuredAddress,
fsName.getHost());
LOG.debug("Will connect to NameNode at HTTP address: " + address);
return address;
final String scheme = DFSUtil.getHttpClientScheme(conf);
URI address = DFSUtil.getInfoServerWithDefaultHost(fsName.getHost(), conf,
scheme);
LOG.debug("Will connect to NameNode at " + address);
return address.toURL();
}
/**
* Return the host:port of where this SecondaryNameNode is listening
* for image transfers
*/
private InetSocketAddress getImageListenAddress() {
return new InetSocketAddress(infoBindAddress, infoPort);
private URL getImageListenAddress() {
StringBuilder sb = new StringBuilder()
.append(DFSUtil.getHttpClientScheme(conf)).append("://")
.append(infoBindAddress).append(":").append(infoPort);
try {
return new URL(sb.toString());
} catch (MalformedURLException e) {
// Unreachable
throw new RuntimeException(e);
}
}
/**

View File

@ -17,13 +17,18 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
import java.io.*;
import java.net.*;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.URL;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.List;
import java.lang.Math;
import javax.servlet.ServletOutputStream;
import javax.servlet.ServletResponse;
@ -41,14 +46,16 @@
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.hdfs.web.URLConnectionFactory;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.util.Time;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
@ -76,15 +83,15 @@ public class TransferFsImage {
private static final Log LOG = LogFactory.getLog(TransferFsImage.class);
public static void downloadMostRecentImageToDirectory(String fsName,
public static void downloadMostRecentImageToDirectory(URL infoServer,
File dir) throws IOException {
String fileId = GetImageServlet.getParamStringForMostRecentImage();
getFileClient(fsName, fileId, Lists.newArrayList(dir),
getFileClient(infoServer, fileId, Lists.newArrayList(dir),
null, false);
}
public static MD5Hash downloadImageToStorage(
String fsName, long imageTxId, Storage dstStorage, boolean needDigest)
URL fsName, long imageTxId, Storage dstStorage, boolean needDigest)
throws IOException {
String fileid = GetImageServlet.getParamStringForImage(
imageTxId, dstStorage);
@ -102,7 +109,7 @@ public static MD5Hash downloadImageToStorage(
return hash;
}
static void downloadEditsToStorage(String fsName, RemoteEditLog log,
static void downloadEditsToStorage(URL fsName, RemoteEditLog log,
NNStorage dstStorage) throws IOException {
assert log.getStartTxId() > 0 && log.getEndTxId() > 0 :
"bad log: " + log;
@ -156,17 +163,17 @@ static void downloadEditsToStorage(String fsName, RemoteEditLog log,
* Requests that the NameNode download an image from this node.
*
* @param fsName the http address for the remote NN
* @param imageListenAddress the host/port where the local node is running an
* @param myNNAddress the host/port where the local node is running an
* HTTPServer hosting GetImageServlet
* @param storage the storage directory to transfer the image from
* @param txid the transaction ID of the image to be uploaded
*/
public static void uploadImageFromStorage(String fsName,
InetSocketAddress imageListenAddress,
public static void uploadImageFromStorage(URL fsName,
URL myNNAddress,
Storage storage, long txid) throws IOException {
String fileid = GetImageServlet.getParamStringToPutImage(
txid, imageListenAddress, storage);
txid, myNNAddress, storage);
// this doesn't directly upload an image, but rather asks the NN
// to connect back to the 2NN to download the specified image.
try {
@ -244,17 +251,11 @@ public static void getFileServer(ServletResponse response, File localfile,
* this storage object will be notified.
* @Return a digest of the received file if getChecksum is true
*/
static MD5Hash getFileClient(String nnHostPort,
static MD5Hash getFileClient(URL infoServer,
String queryString, List<File> localPaths,
Storage dstStorage, boolean getChecksum) throws IOException {
String str = HttpConfig.getSchemePrefix() + nnHostPort + "/getimage?" +
queryString;
LOG.info("Opening connection to " + str);
//
// open connection to remote server
//
URL url = new URL(str);
URL url = new URL(infoServer, "/getimage?" + queryString);
LOG.info("Opening connection to " + url);
return doGetUrl(url, localPaths, dstStorage, getChecksum);
}

View File

@ -23,6 +23,7 @@
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.URI;
import java.net.URL;
import java.security.PrivilegedAction;
import java.util.Collection;
import java.util.List;
@ -69,7 +70,7 @@ public class BootstrapStandby implements Tool, Configurable {
private String nnId;
private String otherNNId;
private String otherHttpAddr;
private URL otherHttpAddr;
private InetSocketAddress otherIpcAddr;
private Collection<URI> dirsToFormat;
private List<URI> editUrisToFormat;
@ -179,6 +180,7 @@ private int doRun() throws IOException {
// Check with the user before blowing away data.
if (!Storage.confirmFormat(storage.dirIterable(null),
force, interactive)) {
storage.close();
return ERR_CODE_ALREADY_FORMATTED;
}
@ -203,7 +205,7 @@ private int doRun() throws IOException {
// Download that checkpoint into our storage directories.
MD5Hash hash = TransferFsImage.downloadImageToStorage(
otherHttpAddr.toString(), imageTxId,
otherHttpAddr, imageTxId,
storage, true);
image.saveDigestAndRenameCheckpointImage(imageTxId, hash);
return 0;
@ -276,11 +278,10 @@ private void parseConfAndFindOtherNN() throws IOException {
"Could not determine valid IPC address for other NameNode (%s)" +
", got: %s", otherNNId, otherIpcAddr);
otherHttpAddr = DFSUtil.getInfoServer(null, otherNode, false);
otherHttpAddr = DFSUtil.substituteForWildcardAddress(otherHttpAddr,
otherIpcAddr.getHostName());
final String scheme = DFSUtil.getHttpClientScheme(conf);
otherHttpAddr = DFSUtil.getInfoServerWithDefaultHost(
otherIpcAddr.getHostName(), otherNode, scheme).toURL();
dirsToFormat = FSNamesystem.getNamespaceDirs(conf);
editUrisToFormat = FSNamesystem.getNamespaceEditsDirs(
conf, false);

View File

@ -20,7 +20,8 @@
import static org.apache.hadoop.util.Time.now;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.URI;
import java.net.URL;
import java.security.PrivilegedAction;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
@ -43,7 +44,6 @@
import org.apache.hadoop.hdfs.server.namenode.SaveNamespaceCancelledException;
import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;
import org.apache.hadoop.hdfs.util.Canceler;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
@ -66,8 +66,8 @@ public class StandbyCheckpointer {
private long lastCheckpointTime;
private final CheckpointerThread thread;
private final ThreadFactory uploadThreadFactory;
private String activeNNAddress;
private InetSocketAddress myNNAddress;
private URL activeNNAddress;
private URL myNNAddress;
private Object cancelLock = new Object();
private Canceler canceler;
@ -94,7 +94,7 @@ public StandbyCheckpointer(Configuration conf, FSNamesystem ns)
*/
private void setNameNodeAddresses(Configuration conf) throws IOException {
// Look up our own address.
String myAddrString = getHttpAddress(conf);
myNNAddress = getHttpAddress(conf);
// Look up the active node's address
Configuration confForActive = HAUtil.getConfForOtherNode(conf);
@ -103,32 +103,22 @@ private void setNameNodeAddresses(Configuration conf) throws IOException {
// Sanity-check.
Preconditions.checkArgument(checkAddress(activeNNAddress),
"Bad address for active NN: %s", activeNNAddress);
Preconditions.checkArgument(checkAddress(myAddrString),
"Bad address for standby NN: %s", myAddrString);
myNNAddress = NetUtils.createSocketAddr(myAddrString);
Preconditions.checkArgument(checkAddress(myNNAddress),
"Bad address for standby NN: %s", myNNAddress);
}
private String getHttpAddress(Configuration conf) throws IOException {
String configuredAddr = DFSUtil.getInfoServer(null, conf, false);
// Use the hostname from the RPC address as a default, in case
// the HTTP address is configured to 0.0.0.0.
String hostnameFromRpc = NameNode.getServiceAddress(
conf, true).getHostName();
try {
return DFSUtil.substituteForWildcardAddress(
configuredAddr, hostnameFromRpc);
} catch (IOException e) {
throw new IllegalArgumentException(e);
}
private URL getHttpAddress(Configuration conf) throws IOException {
final String scheme = DFSUtil.getHttpClientScheme(conf);
String defaultHost = NameNode.getServiceAddress(conf, true).getHostName();
URI addr = DFSUtil.getInfoServerWithDefaultHost(defaultHost, conf, scheme);
return addr.toURL();
}
/**
* Ensure that the given address is valid and has a port
* specified.
*/
private boolean checkAddress(String addrStr) {
InetSocketAddress addr = NetUtils.createSocketAddr(addrStr);
private static boolean checkAddress(URL addr) {
return addr.getPort() != 0;
}
@ -344,7 +334,7 @@ private void doWork() {
}
@VisibleForTesting
String getActiveNNAddress() {
URL getActiveNNAddress() {
return activeNNAddress;
}
}

View File

@ -33,7 +33,7 @@
* The difference of an {@link INodeFile} between two snapshots.
*/
public class FileDiff extends
AbstractINodeDiff<INodeFileWithSnapshot, INodeFileAttributes, FileDiff> {
AbstractINodeDiff<INodeFile, INodeFileAttributes, FileDiff> {
/** The file size at snapshot creation time. */
private final long fileSize;
@ -56,11 +56,12 @@ public long getFileSize() {
}
@Override
Quota.Counts combinePosteriorAndCollectBlocks(
INodeFileWithSnapshot currentINode, FileDiff posterior,
BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes) {
return currentINode.updateQuotaAndCollectBlocks(posterior, collectedBlocks,
removedINodes);
Quota.Counts combinePosteriorAndCollectBlocks(INodeFile currentINode,
FileDiff posterior, BlocksMapUpdateInfo collectedBlocks,
final List<INode> removedINodes) {
return currentINode.getFileWithSnapshotFeature()
.updateQuotaAndCollectBlocks(currentINode, posterior, collectedBlocks,
removedINodes);
}
@Override
@ -84,9 +85,10 @@ void write(DataOutput out, ReferenceMap referenceMap) throws IOException {
}
@Override
Quota.Counts destroyDiffAndCollectBlocks(INodeFileWithSnapshot currentINode,
Quota.Counts destroyDiffAndCollectBlocks(INodeFile currentINode,
BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes) {
return currentINode.updateQuotaAndCollectBlocks(this, collectedBlocks,
removedINodes);
return currentINode.getFileWithSnapshotFeature()
.updateQuotaAndCollectBlocks(currentINode, this, collectedBlocks,
removedINodes);
}
}

View File

@ -17,19 +17,20 @@
*/
package org.apache.hadoop.hdfs.server.namenode.snapshot;
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes;
/** A list of FileDiffs for storing snapshot data. */
public class FileDiffList extends
AbstractINodeDiffList<INodeFileWithSnapshot, INodeFileAttributes, FileDiff> {
AbstractINodeDiffList<INodeFile, INodeFileAttributes, FileDiff> {
@Override
FileDiff createDiff(Snapshot snapshot, INodeFileWithSnapshot file) {
FileDiff createDiff(Snapshot snapshot, INodeFile file) {
return new FileDiff(snapshot, file);
}
@Override
INodeFileAttributes createSnapshotCopy(INodeFileWithSnapshot currentINode) {
INodeFileAttributes createSnapshotCopy(INodeFile currentINode) {
return new INodeFileAttributes.SnapshotCopy(currentINode);
}
}

View File

@ -23,90 +23,51 @@
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.namenode.INode;
import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes;
import org.apache.hadoop.hdfs.server.namenode.INodeMap;
import org.apache.hadoop.hdfs.server.namenode.Quota;
/**
* Represent an {@link INodeFile} that is snapshotted.
* Feature for file with snapshot-related information.
*/
@InterfaceAudience.Private
public class INodeFileWithSnapshot extends INodeFile {
public class FileWithSnapshotFeature extends INodeFile.Feature {
private final FileDiffList diffs;
private boolean isCurrentFileDeleted = false;
public INodeFileWithSnapshot(INodeFile f) {
this(f, f instanceof INodeFileWithSnapshot ?
((INodeFileWithSnapshot) f).getDiffs() : null);
}
public INodeFileWithSnapshot(INodeFile f, FileDiffList diffs) {
super(f);
public FileWithSnapshotFeature(FileDiffList diffs) {
this.diffs = diffs != null? diffs: new FileDiffList();
}
/** Is the current file deleted? */
public boolean isCurrentFileDeleted() {
return isCurrentFileDeleted;
}
/** Delete the file from the current tree */
/**
* We need to distinguish two scenarios:
* 1) the file is still in the current file directory, it has been modified
* before while it is included in some snapshot
* 2) the file is not in the current file directory (deleted), but it is in
* some snapshot, thus we still keep this inode
* For both scenarios the file has snapshot feature. We set
* {@link #isCurrentFileDeleted} to true for 2).
*/
public void deleteCurrentFile() {
isCurrentFileDeleted = true;
}
@Override
public INodeFileAttributes getSnapshotINode(Snapshot snapshot) {
return diffs.getSnapshotINode(snapshot, this);
public INodeFileAttributes getSnapshotINode(INodeFile f, Snapshot snapshot) {
return diffs.getSnapshotINode(snapshot, f);
}
@Override
public INodeFileWithSnapshot recordModification(final Snapshot latest,
final INodeMap inodeMap) throws QuotaExceededException {
if (isInLatestSnapshot(latest) && !shouldRecordInSrcSnapshot(latest)) {
diffs.saveSelf2Snapshot(latest, this, null);
}
return this;
}
/** @return the file diff list. */
public FileDiffList getDiffs() {
return diffs;
}
@Override
public Quota.Counts cleanSubtree(final Snapshot snapshot, Snapshot prior,
final BlocksMapUpdateInfo collectedBlocks,
final List<INode> removedINodes, final boolean countDiffChange)
throws QuotaExceededException {
if (snapshot == null) { // delete the current file
if (!isCurrentFileDeleted()) {
recordModification(prior, null);
deleteCurrentFile();
}
this.collectBlocksAndClear(collectedBlocks, removedINodes);
return Quota.Counts.newInstance();
} else { // delete a snapshot
prior = getDiffs().updatePrior(snapshot, prior);
return diffs.deleteSnapshotDiff(snapshot, prior, this, collectedBlocks,
removedINodes, countDiffChange);
}
}
@Override
public String toDetailString() {
return super.toDetailString()
+ (isCurrentFileDeleted()? "(DELETED), ": ", ") + diffs;
}
/**
* @return block replication, which is the max file replication among
* the file and the diff list.
*/
@Override
public short getBlockReplication() {
short max = isCurrentFileDeleted() ? 0 : getFileReplication();
/** @return the max replication factor in diffs */
public short getMaxBlockRepInDiffs() {
short max = 0;
for(FileDiff d : getDiffs()) {
if (d.snapshotINode != null) {
final short replication = d.snapshotINode.getFileReplication();
@ -118,33 +79,79 @@ public short getBlockReplication() {
return max;
}
public String getDetailedString() {
return (isCurrentFileDeleted()? "(DELETED), ": ", ") + diffs;
}
public Quota.Counts cleanFile(final INodeFile file, final Snapshot snapshot,
Snapshot prior, final BlocksMapUpdateInfo collectedBlocks,
final List<INode> removedINodes, final boolean countDiffChange)
throws QuotaExceededException {
if (snapshot == null) {
// delete the current file while the file has snapshot feature
if (!isCurrentFileDeleted()) {
file.recordModification(prior, null);
deleteCurrentFile();
}
collectBlocksAndClear(file, collectedBlocks, removedINodes);
return Quota.Counts.newInstance();
} else { // delete the snapshot
prior = getDiffs().updatePrior(snapshot, prior);
return diffs.deleteSnapshotDiff(snapshot, prior, file, collectedBlocks,
removedINodes, countDiffChange);
}
}
public void clearDiffs() {
this.diffs.clear();
}
public Quota.Counts updateQuotaAndCollectBlocks(INodeFile file,
FileDiff removed, BlocksMapUpdateInfo collectedBlocks,
final List<INode> removedINodes) {
long oldDiskspace = file.diskspaceConsumed();
if (removed.snapshotINode != null) {
short replication = removed.snapshotINode.getFileReplication();
short currentRepl = file.getBlockReplication();
if (currentRepl == 0) {
oldDiskspace = file.computeFileSize(true, true) * replication;
} else if (replication > currentRepl) {
oldDiskspace = oldDiskspace / file.getBlockReplication() * replication;
}
}
collectBlocksAndClear(file, collectedBlocks, removedINodes);
long dsDelta = oldDiskspace - file.diskspaceConsumed();
return Quota.Counts.newInstance(0, dsDelta);
}
/**
* If some blocks at the end of the block list no longer belongs to
* any inode, collect them and update the block list.
*/
void collectBlocksAndClear(final BlocksMapUpdateInfo info,
final List<INode> removedINodes) {
private void collectBlocksAndClear(final INodeFile file,
final BlocksMapUpdateInfo info, final List<INode> removedINodes) {
// check if everything is deleted.
if (isCurrentFileDeleted() && getDiffs().asList().isEmpty()) {
destroyAndCollectBlocks(info, removedINodes);
file.destroyAndCollectBlocks(info, removedINodes);
return;
}
// find max file size.
final long max;
if (isCurrentFileDeleted()) {
final FileDiff last = getDiffs().getLast();
max = last == null? 0: last.getFileSize();
} else {
max = computeFileSize();
max = file.computeFileSize();
}
collectBlocksBeyondMax(max, info);
collectBlocksBeyondMax(file, max, info);
}
private void collectBlocksBeyondMax(final long max,
private void collectBlocksBeyondMax(final INodeFile file, final long max,
final BlocksMapUpdateInfo collectedBlocks) {
final BlockInfo[] oldBlocks = getBlocks();
final BlockInfo[] oldBlocks = file.getBlocks();
if (oldBlocks != null) {
//find the minimum n such that the size of the first n blocks > max
int n = 0;
@ -164,7 +171,7 @@ private void collectBlocksBeyondMax(final long max,
}
// set new blocks
setBlocks(newBlocks);
file.setBlocks(newBlocks);
// collect the blocks beyond max.
if (collectedBlocks != null) {
@ -175,24 +182,4 @@ private void collectBlocksBeyondMax(final long max,
}
}
}
Quota.Counts updateQuotaAndCollectBlocks(FileDiff removed,
BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes) {
long oldDiskspace = this.diskspaceConsumed();
if (removed.snapshotINode != null) {
short replication = removed.snapshotINode.getFileReplication();
short currentRepl = getBlockReplication();
if (currentRepl == 0) {
oldDiskspace = computeFileSize(true, true) * replication;
} else if (replication > currentRepl) {
oldDiskspace = oldDiskspace / getBlockReplication()
* replication;
}
}
this.collectBlocksAndClear(collectedBlocks, removedINodes);
long dsDelta = oldDiskspace - diskspaceConsumed();
return Quota.Counts.newInstance(0, dsDelta);
}
}

View File

@ -34,9 +34,9 @@
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport;
import org.apache.hadoop.hdfs.protocol.SnapshotException;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffReportEntry;
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffType;
import org.apache.hadoop.hdfs.protocol.SnapshotException;
import org.apache.hadoop.hdfs.server.namenode.Content;
import org.apache.hadoop.hdfs.server.namenode.ContentSummaryComputationContext;
import org.apache.hadoop.hdfs.server.namenode.INode;
@ -432,8 +432,8 @@ private void computeDiffRecursively(INode node, List<byte[]> parentPath,
parentPath.remove(parentPath.size() - 1);
}
}
} else if (node.isFile() && node.asFile() instanceof INodeFileWithSnapshot) {
INodeFileWithSnapshot file = (INodeFileWithSnapshot) node.asFile();
} else if (node.isFile() && node.asFile().isWithSnapshot()) {
INodeFile file = node.asFile();
Snapshot earlierSnapshot = diffReport.isFromEarlier() ? diffReport.from
: diffReport.to;
Snapshot laterSnapshot = diffReport.isFromEarlier() ? diffReport.to

View File

@ -37,6 +37,7 @@
import org.apache.hadoop.hdfs.server.namenode.INode;
import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryAttributes;
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
import org.apache.hadoop.hdfs.server.namenode.INodeMap;
import org.apache.hadoop.hdfs.server.namenode.INodeReference;
import org.apache.hadoop.hdfs.server.namenode.Quota;
@ -803,10 +804,9 @@ private static Quota.Counts cleanDeletedINode(INode inode,
}
// For DstReference node, since the node is not in the created list of
// prior, we should treat it as regular file/dir
} else if (topNode.isFile()
&& topNode.asFile() instanceof INodeFileWithSnapshot) {
INodeFileWithSnapshot fs = (INodeFileWithSnapshot) topNode.asFile();
counts.add(fs.getDiffs().deleteSnapshotDiff(post, prior, fs,
} else if (topNode.isFile() && topNode.asFile().isWithSnapshot()) {
INodeFile file = topNode.asFile();
counts.add(file.getDiffs().deleteSnapshotDiff(post, prior, file,
collectedBlocks, removedINodes, countDiffChange));
} else if (topNode.isDirectory()) {
INodeDirectory dir = topNode.asDirectory();

View File

@ -97,8 +97,7 @@ public static void saveDirectoryDiffList(final INodeDirectory dir,
public static void saveFileDiffList(final INodeFile file,
final DataOutput out) throws IOException {
saveINodeDiffs(file instanceof INodeFileWithSnapshot?
((INodeFileWithSnapshot) file).getDiffs(): null, out, null);
saveINodeDiffs(file.getDiffs(), out, null);
}
public static FileDiffList loadFileDiffList(DataInput in,

View File

@ -20,6 +20,7 @@
import java.io.File;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.URL;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Collections;
@ -47,9 +48,9 @@
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.SnapshotException;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import org.apache.hadoop.hdfs.protocol.SnapshotException;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;
import org.apache.hadoop.ipc.RPC;
@ -547,8 +548,10 @@ public int setBalancerBandwidth(String[] argv, int idx) throws IOException {
* @throws IOException
*/
public int fetchImage(final String[] argv, final int idx) throws IOException {
final String infoServer = DFSUtil.getInfoServer(
HAUtil.getAddressOfActive(getDFS()), getConf(), false);
Configuration conf = getConf();
final URL infoServer = DFSUtil.getInfoServer(
HAUtil.getAddressOfActive(getDFS()), conf,
DFSUtil.getHttpClientScheme(conf)).toURL();
SecurityUtil.doAsCurrentUser(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {

View File

@ -22,6 +22,7 @@
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
@ -37,7 +38,6 @@
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck;
import org.apache.hadoop.hdfs.web.URLConnectionFactory;
import org.apache.hadoop.http.HttpConfig;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authentication.client.AuthenticationException;
import org.apache.hadoop.util.StringUtils;
@ -227,7 +227,7 @@ private Integer listCorruptFileBlocks(String dir, String baseUrl)
* @return Returns http address or null if failure.
* @throws IOException if we can't determine the active NN address
*/
private String getCurrentNamenodeAddress() throws IOException {
private URI getCurrentNamenodeAddress() throws IOException {
//String nnAddress = null;
Configuration conf = getConf();
@ -245,19 +245,21 @@ private String getCurrentNamenodeAddress() throws IOException {
return null;
}
return DFSUtil.getInfoServer(HAUtil.getAddressOfActive(fs), conf, false);
return DFSUtil.getInfoServer(HAUtil.getAddressOfActive(fs), conf,
DFSUtil.getHttpClientScheme(conf));
}
private int doWork(final String[] args) throws IOException {
final StringBuilder url = new StringBuilder(HttpConfig.getSchemePrefix());
final StringBuilder url = new StringBuilder();
String namenodeAddress = getCurrentNamenodeAddress();
URI namenodeAddress = getCurrentNamenodeAddress();
if (namenodeAddress == null) {
//Error message already output in {@link #getCurrentNamenodeAddress()}
System.err.println("DFSck exiting.");
return 0;
}
url.append(namenodeAddress);
url.append(namenodeAddress.toString());
System.err.println("Connecting to namenode via " + url.toString());
url.append("/fsck?ugi=").append(ugi.getShortUserName());

View File

@ -100,6 +100,18 @@ void start() throws IOException {}
@Override
void finish() throws IOException {
output();
super.finish();
}
@Override
void finishAbnormally() throws IOException {
System.out.println("*** Image processing finished abnormally. Ending ***");
output();
super.finishAbnormally();
}
private void output() throws IOException {
// write the distribution into the output file
write("Size\tNumFiles\n");
for(int i = 0; i < distribution.length; i++)
@ -109,7 +121,6 @@ void finish() throws IOException {
System.out.println("totalBlocks = " + totalBlocks);
System.out.println("totalSpace = " + totalSpace);
System.out.println("maxFileSize = " + maxFileSize);
super.finish();
}
@Override

View File

@ -129,7 +129,7 @@ class ImageLoaderCurrent implements ImageLoader {
-40, -41, -42, -43, -44, -45, -46, -47, -48, -49 };
private int imageVersion = 0;
private final Map<Long, String> subtreeMap = new HashMap<Long, String>();
private final Map<Long, Boolean> subtreeMap = new HashMap<Long, Boolean>();
private final Map<Long, String> dirNodeMap = new HashMap<Long, String>();
/* (non-Javadoc)
@ -500,11 +500,15 @@ private void processDirectoryWithSnapshot(DataInputStream in, ImageVisitor v,
// 1. load dir node id
long inodeId = in.readLong();
String dirName = dirNodeMap.get(inodeId);
String oldValue = subtreeMap.put(inodeId, dirName);
if (oldValue != null) { // the subtree has been visited
return;
}
String dirName = dirNodeMap.remove(inodeId);
Boolean visitedRef = subtreeMap.get(inodeId);
if (visitedRef != null) {
if (visitedRef.booleanValue()) { // the subtree has been visited
return;
} else { // first time to visit
subtreeMap.put(inodeId, true);
}
} // else the dir is not linked by a RefNode, thus cannot be revisited
// 2. load possible snapshots
processSnapshots(in, v, dirName);
@ -695,6 +699,8 @@ private void processINode(DataInputStream in, ImageVisitor v,
if (numBlocks >= 0) { // File
if (supportSnapshot) {
// make sure subtreeMap only contains entry for directory
subtreeMap.remove(inodeId);
// process file diffs
processFileDiffList(in, v, parentName);
if (isSnapshotCopy) {
@ -738,6 +744,11 @@ private void processINode(DataInputStream in, ImageVisitor v,
final boolean firstReferred = in.readBoolean();
if (firstReferred) {
// if a subtree is linked by multiple "parents", the corresponding dir
// must be referred by a reference node. we put the reference node into
// the subtreeMap here and let its value be false. when we later visit
// the subtree for the first time, we change the value to true.
subtreeMap.put(inodeId, false);
v.visitEnclosingElement(ImageElement.SNAPSHOT_REF_INODE);
processINode(in, v, skipBlocks, parentName, isSnapshotCopy);
v.leaveEnclosingElement(); // referred inode

View File

@ -157,6 +157,8 @@ public synchronized void initialize(URI uri, Configuration conf
) throws IOException {
super.initialize(uri, conf);
setConf(conf);
/** set user pattern based on configuration file */
UserParam.setUserPattern(conf.get(DFSConfigKeys.DFS_WEBHDFS_USER_PATTERN_KEY, DFSConfigKeys.DFS_WEBHDFS_USER_PATTERN_DEFAULT));
connectionFactory = URLConnectionFactory
.newDefaultURLConnectionFactory(conf);
initializeTokenAspect();

View File

@ -17,8 +17,10 @@
*/
package org.apache.hadoop.hdfs.web.resources;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_WEBHDFS_USER_PATTERN_DEFAULT;
import org.apache.hadoop.security.UserGroupInformation;
import com.google.common.annotations.VisibleForTesting;
import java.text.MessageFormat;
import java.util.regex.Pattern;
@ -29,8 +31,21 @@ public class UserParam extends StringParam {
/** Default parameter value. */
public static final String DEFAULT = "";
private static final Domain DOMAIN = new Domain(NAME,
Pattern.compile("^[A-Za-z_][A-Za-z0-9._-]*[$]?$"));
private static Domain domain = new Domain(NAME, Pattern.compile(DFS_WEBHDFS_USER_PATTERN_DEFAULT));
@VisibleForTesting
public static Domain getUserPatternDomain() {
return domain;
}
@VisibleForTesting
public static void setUserPatternDomain(Domain dm) {
domain = dm;
}
public static void setUserPattern(String pattern) {
domain = new Domain(NAME, Pattern.compile(pattern));
}
private static String validateLength(String str) {
if (str == null) {
@ -50,7 +65,7 @@ private static String validateLength(String str) {
* @param str a string representation of the parameter value.
*/
public UserParam(final String str) {
super(DOMAIN, str == null || str.equals(DEFAULT)? null : validateLength(str));
super(domain, str == null || str.equals(DEFAULT)? null : validateLength(str));
}
/**
@ -64,4 +79,4 @@ public UserParam(final UserGroupInformation ugi) {
public String getName() {
return NAME;
}
}
}

View File

@ -1593,4 +1593,12 @@
</description>
</property>
<property>
<name>dfs.webhdfs.user.provider.user.pattern</name>
<value>^[A-Za-z_][A-Za-z0-9._-]*[$]?$</value>
<description>
Valid pattern for user and group names for webhdfs, it must be a valid java regex.
</description>
</property>
</configuration>

View File

@ -19,7 +19,6 @@
package org.apache.hadoop.hdfs;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_KEY;
@ -431,20 +430,22 @@ public void testEmptyConf() {
}
@Test
public void testGetInfoServer() throws IOException {
public void testGetInfoServer() throws IOException, URISyntaxException {
HdfsConfiguration conf = new HdfsConfiguration();
conf.set(HADOOP_SECURITY_AUTHENTICATION, "kerberos");
UserGroupInformation.setConfiguration(conf);
String httpsport = DFSUtil.getInfoServer(null, conf, true);
assertEquals("0.0.0.0:"+DFS_NAMENODE_HTTPS_PORT_DEFAULT, httpsport);
URI httpsport = DFSUtil.getInfoServer(null, conf, "https");
assertEquals(new URI("https", null, "0.0.0.0",
DFS_NAMENODE_HTTPS_PORT_DEFAULT, null, null, null), httpsport);
String httpport = DFSUtil.getInfoServer(null, conf, false);
assertEquals("0.0.0.0:"+DFS_NAMENODE_HTTP_PORT_DEFAULT, httpport);
String httpAddress = DFSUtil.getInfoServer(new InetSocketAddress(
"localhost", 8020), conf, false);
assertEquals("localhost:" + DFS_NAMENODE_HTTP_PORT_DEFAULT, httpAddress);
URI httpport = DFSUtil.getInfoServer(null, conf, "http");
assertEquals(new URI("http", null, "0.0.0.0",
DFS_NAMENODE_HTTP_PORT_DEFAULT, null, null, null), httpport);
URI httpAddress = DFSUtil.getInfoServer(new InetSocketAddress(
"localhost", 8020), conf, "http");
assertEquals(
URI.create("http://localhost:" + DFS_NAMENODE_HTTP_PORT_DEFAULT),
httpAddress);
}
@Test

View File

@ -42,6 +42,10 @@ static public void setupCluster() throws Exception {
new File(sockDir.getDir(),
"TestParallelShortCircuitReadUnCached._PORT.sock").getAbsolutePath());
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
// Enabling data transfer encryption should have no effect when using
// short-circuit local reads. This is a regression test for HDFS-5353.
conf.setBoolean(DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY, true);
conf.setBoolean(DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY, true);
conf.setBoolean(DFSConfigKeys.
DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
conf.setBoolean(DFSConfigKeys.

View File

@ -140,6 +140,11 @@ public boolean equals(Object o) {
public int hashCode() {
return dnId.hashCode() ^ (hasDomain ? 1 : 0);
}
@Override
public boolean hasSecureChannel() {
return false;
}
}
@Test

View File

@ -97,7 +97,6 @@ void testRestartDfs(boolean useFlush) throws Exception {
conf.setInt(
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
0);
conf.setBoolean(DFSConfigKeys.DFS_PERSIST_BLOCKS_KEY, true);
MiniDFSCluster cluster = null;
long len = 0;
@ -157,7 +156,6 @@ public void testRestartDfsWithAbandonedBlock() throws Exception {
conf.setInt(
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
0);
conf.setBoolean(DFSConfigKeys.DFS_PERSIST_BLOCKS_KEY, true);
MiniDFSCluster cluster = null;
long len = 0;
@ -219,7 +217,6 @@ public void testRestartWithPartialBlockHflushed() throws IOException {
conf.setInt(
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
0);
conf.setBoolean(DFSConfigKeys.DFS_PERSIST_BLOCKS_KEY, true);
MiniDFSCluster cluster = null;
FSDataOutputStream stream;
@ -269,7 +266,6 @@ public void testRestartWithAppend() throws IOException {
conf.setInt(
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
0);
conf.setBoolean(DFSConfigKeys.DFS_PERSIST_BLOCKS_KEY, true);
MiniDFSCluster cluster = null;
FSDataOutputStream stream;

View File

@ -434,7 +434,6 @@ void testCheckpoint(StartupOption op) throws Exception {
public void testCanReadData() throws IOException {
Path file1 = new Path("/fileToRead.dat");
Configuration conf = new HdfsConfiguration();
conf.setBoolean(DFSConfigKeys.DFS_PERSIST_BLOCKS_KEY, true);
MiniDFSCluster cluster = null;
FileSystem fileSys = null;
BackupNode backup = null;

View File

@ -34,6 +34,7 @@
import java.lang.management.ManagementFactory;
import java.net.InetSocketAddress;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
@ -71,7 +72,6 @@
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
import org.apache.hadoop.hdfs.tools.DFSAdmin;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
@ -218,6 +218,7 @@ public void testWriteTransactionIdHandlesIOE() throws Exception {
assertTrue("Removed directory wasn't what was expected",
listRsd.size() > 0 && listRsd.get(listRsd.size() - 1).getRoot().
toString().indexOf("storageDirToCheck") != -1);
nnStorage.close();
}
/*
@ -1947,8 +1948,9 @@ public void testNamespaceVerifiedOnFileTransfer() throws IOException {
.format(true).build();
NamenodeProtocols nn = cluster.getNameNodeRpc();
String fsName = NetUtils.getHostPortString(
cluster.getNameNode().getHttpAddress());
URL fsName = DFSUtil.getInfoServer(
cluster.getNameNode().getServiceRpcAddress(), conf,
DFSUtil.getHttpClientScheme(conf)).toURL();
// Make a finalized log on the server side.
nn.rollEditLog();
@ -1980,8 +1982,7 @@ public void testNamespaceVerifiedOnFileTransfer() throws IOException {
}
try {
InetSocketAddress fakeAddr = new InetSocketAddress(1);
TransferFsImage.uploadImageFromStorage(fsName, fakeAddr, dstImage, 0);
TransferFsImage.uploadImageFromStorage(fsName, new URL("http://localhost:1234"), dstImage, 0);
fail("Storage info was not verified");
} catch (IOException ioe) {
String msg = StringUtils.stringifyException(ioe);

View File

@ -32,7 +32,6 @@
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectoryWithSnapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeFileWithSnapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
import org.junit.AfterClass;
import org.junit.Assert;
@ -239,7 +238,7 @@ public void testSnapshotPathINodes() throws Exception {
// The last INode should be the INode for sub1
final INode last = nodesInPath.getLastINode();
assertEquals(last.getFullPathName(), sub1.toString());
assertFalse(last instanceof INodeFileWithSnapshot);
assertFalse(last instanceof INodeFile);
String[] invalidPathComponent = {"invalidDir", "foo", ".snapshot", "bar"};
Path invalidPath = new Path(invalidPathComponent[0]);
@ -287,7 +286,7 @@ public void testSnapshotPathINodesAfterDeletion() throws Exception {
// Check the INode for file1 (snapshot file)
final INode inode = inodes[inodes.length - 1];
assertEquals(file1.getName(), inode.getLocalName());
assertEquals(INodeFileWithSnapshot.class, inode.getClass());
assertTrue(inode.asFile().isWithSnapshot());
}
// Check the INodes for path /TestSnapshot/sub1/file1
@ -391,6 +390,8 @@ public void testSnapshotPathINodesAfterModification() throws Exception {
// The last INode should be associated with file1
assertEquals(inodes[components.length - 1].getFullPathName(),
file1.toString());
// record the modification time of the inode
final long modTime = inodes[inodes.length - 1].getModificationTime();
// Create a snapshot for the dir, and check the inodes for the path
// pointing to a snapshot file
@ -414,10 +415,10 @@ public void testSnapshotPathINodesAfterModification() throws Exception {
// Check the INode for snapshot of file1
INode snapshotFileNode = ssInodes[ssInodes.length - 1];
assertEquals(snapshotFileNode.getLocalName(), file1.getName());
assertTrue(snapshotFileNode instanceof INodeFileWithSnapshot);
assertTrue(snapshotFileNode.asFile().isWithSnapshot());
// The modification time of the snapshot INode should be the same with the
// original INode before modification
assertEquals(inodes[inodes.length - 1].getModificationTime(),
assertEquals(modTime,
snapshotFileNode.getModificationTime(ssNodesInPath.getPathSnapshot()));
// Check the INode for /TestSnapshot/sub1/file1 again
@ -432,7 +433,6 @@ public void testSnapshotPathINodesAfterModification() throws Exception {
final int last = components.length - 1;
assertEquals(newInodes[last].getFullPathName(), file1.toString());
// The modification time of the INode for file3 should have been changed
Assert.assertFalse(inodes[last].getModificationTime()
== newInodes[last].getModificationTime());
Assert.assertFalse(modTime == newInodes[last].getModificationTime());
}
}

View File

@ -34,11 +34,11 @@
import javax.servlet.http.HttpServletResponse;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.http.HttpServer;
import org.apache.hadoop.http.HttpServerFunctionalTest;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.test.PathUtils;
import org.apache.hadoop.util.StringUtils;
import org.junit.Test;
@ -66,8 +66,9 @@ public void testClientSideException() throws IOException {
new File("/xxxxx-does-not-exist/blah"));
try {
String fsName = NetUtils.getHostPortString(
cluster.getNameNode().getHttpAddress());
URL fsName = DFSUtil.getInfoServer(
cluster.getNameNode().getServiceRpcAddress(), conf,
DFSUtil.getHttpClientScheme(conf)).toURL();
String id = "getimage=1&txid=0";
TransferFsImage.getFileClient(fsName, id, localPath, mockStorage, false);
@ -98,8 +99,10 @@ public void testClientSideExceptionOnJustOneDir() throws IOException {
);
try {
String fsName = NetUtils.getHostPortString(
cluster.getNameNode().getHttpAddress());
URL fsName = DFSUtil.getInfoServer(
cluster.getNameNode().getServiceRpcAddress(), conf,
DFSUtil.getHttpClientScheme(conf)).toURL();
String id = "getimage=1&txid=0";
TransferFsImage.getFileClient(fsName, id, localPaths, mockStorage, false);
@ -123,7 +126,7 @@ public void testImageTransferTimeout() throws Exception {
URL serverURL = HttpServerFunctionalTest.getServerURL(testServer);
TransferFsImage.timeout = 2000;
try {
TransferFsImage.getFileClient(serverURL.getAuthority(), "txid=1", null,
TransferFsImage.getFileClient(serverURL, "txid=1", null,
null, false);
fail("TransferImage Should fail with timeout");
} catch (SocketTimeoutException e) {

View File

@ -24,6 +24,7 @@
import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.util.Collection;
import org.apache.hadoop.conf.Configuration;
@ -86,7 +87,8 @@ public void testGetOtherNNHttpAddress() throws IOException {
// 0.0.0.0, it should substitute the address from the RPC configuration
// above.
StandbyCheckpointer checkpointer = new StandbyCheckpointer(conf, fsn);
assertEquals("1.2.3.2:" + DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT,
assertEquals(new URL("http", "1.2.3.2",
DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT, ""),
checkpointer.getActiveNNAddress());
}

View File

@ -176,7 +176,7 @@ public void testSnapshotWhileAppending() throws Exception {
dirNode = (INodeDirectorySnapshottable) fsdir.getINode(dir.toString());
last = dirNode.getDiffs().getLast();
Snapshot s1 = last.snapshot;
assertTrue(fileNode instanceof INodeFileWithSnapshot);
assertTrue(fileNode.isWithSnapshot());
assertEquals(BLOCKSIZE * 3, fileNode.computeFileSize(s1));
// 4. modify file --> append without closing stream --> take snapshot -->

View File

@ -403,8 +403,7 @@ public void testRenameFileAcrossSnapshottableDirs() throws Exception {
final Path foo_s3 = SnapshotTestHelper.getSnapshotPath(sdir1, "s3",
"foo");
assertFalse(hdfs.exists(foo_s3));
INodeFileWithSnapshot sfoo = (INodeFileWithSnapshot) fsdir.getINode(
newfoo.toString()).asFile();
INodeFile sfoo = fsdir.getINode(newfoo.toString()).asFile();
assertEquals("s2", sfoo.getDiffs().getLastSnapshot().getRoot()
.getLocalName());
}
@ -604,8 +603,7 @@ public void testRenameFileAndDeleteSnapshot() throws Exception {
status = hdfs.getFileStatus(foo_s2);
assertEquals(REPL, status.getReplication());
INodeFileWithSnapshot snode = (INodeFileWithSnapshot) fsdir.getINode(
newfoo.toString()).asFile();
INodeFile snode = fsdir.getINode(newfoo.toString()).asFile();
assertEquals(1, snode.getDiffs().asList().size());
assertEquals("s2", snode.getDiffs().getLastSnapshot().getRoot()
.getLocalName());
@ -763,8 +761,7 @@ public void testRenameMoreThanOnceAcrossSnapDirs() throws Exception {
.asDirectory();
assertEquals(1, foo.getDiffs().asList().size());
assertEquals("s1", foo.getLastSnapshot().getRoot().getLocalName());
INodeFileWithSnapshot bar1 = (INodeFileWithSnapshot) fsdir.getINode4Write(
bar1_dir1.toString()).asFile();
INodeFile bar1 = fsdir.getINode4Write(bar1_dir1.toString()).asFile();
assertEquals(1, bar1.getDiffs().asList().size());
assertEquals("s1", bar1.getDiffs().getLastSnapshot().getRoot()
.getLocalName());
@ -774,7 +771,7 @@ public void testRenameMoreThanOnceAcrossSnapDirs() throws Exception {
INodeReference.WithCount barWithCount = (WithCount) barRef
.getReferredINode();
assertEquals(2, barWithCount.getReferenceCount());
INodeFileWithSnapshot bar = (INodeFileWithSnapshot) barWithCount.asFile();
INodeFile bar = barWithCount.asFile();
assertEquals(1, bar.getDiffs().asList().size());
assertEquals("s1", bar.getDiffs().getLastSnapshot().getRoot()
.getLocalName());
@ -984,8 +981,7 @@ public void testRenameMoreThanOnceAcrossSnapDirs_2() throws Exception {
assertEquals("s333", fooDiffs.get(2).snapshot.getRoot().getLocalName());
assertEquals("s22", fooDiffs.get(1).snapshot.getRoot().getLocalName());
assertEquals("s1", fooDiffs.get(0).snapshot.getRoot().getLocalName());
INodeFileWithSnapshot bar1 = (INodeFileWithSnapshot) fsdir.getINode4Write(
bar1_dir1.toString()).asFile();
INodeFile bar1 = fsdir.getINode4Write(bar1_dir1.toString()).asFile();
List<FileDiff> bar1Diffs = bar1.getDiffs().asList();
assertEquals(3, bar1Diffs.size());
assertEquals("s333", bar1Diffs.get(2).snapshot.getRoot().getLocalName());
@ -997,7 +993,7 @@ public void testRenameMoreThanOnceAcrossSnapDirs_2() throws Exception {
INodeReference.WithCount barWithCount = (WithCount) barRef.getReferredINode();
// 5 references: s1, s22, s333, s2222, current tree of sdir1
assertEquals(5, barWithCount.getReferenceCount());
INodeFileWithSnapshot bar = (INodeFileWithSnapshot) barWithCount.asFile();
INodeFile bar = barWithCount.asFile();
List<FileDiff> barDiffs = bar.getDiffs().asList();
assertEquals(4, barDiffs.size());
assertEquals("s2222", barDiffs.get(3).snapshot.getRoot().getLocalName());
@ -1047,7 +1043,7 @@ public void testRenameMoreThanOnceAcrossSnapDirs_2() throws Exception {
barRef = fsdir.getINode(bar_s2222.toString()).asReference();
barWithCount = (WithCount) barRef.getReferredINode();
assertEquals(4, barWithCount.getReferenceCount());
bar = (INodeFileWithSnapshot) barWithCount.asFile();
bar = barWithCount.asFile();
barDiffs = bar.getDiffs().asList();
assertEquals(4, barDiffs.size());
assertEquals("s2222", barDiffs.get(3).snapshot.getRoot().getLocalName());
@ -1229,7 +1225,7 @@ public void testRenameAndAppend() throws Exception {
fooRef = fsdir.getINode4Write(foo2.toString());
assertTrue(fooRef instanceof INodeReference.DstReference);
INodeFile fooNode = fooRef.asFile();
assertTrue(fooNode instanceof INodeFileWithSnapshot);
assertTrue(fooNode.isWithSnapshot());
assertTrue(fooNode.isUnderConstruction());
} finally {
if (out != null) {
@ -1240,7 +1236,7 @@ public void testRenameAndAppend() throws Exception {
fooRef = fsdir.getINode4Write(foo2.toString());
assertTrue(fooRef instanceof INodeReference.DstReference);
INodeFile fooNode = fooRef.asFile();
assertTrue(fooNode instanceof INodeFileWithSnapshot);
assertTrue(fooNode.isWithSnapshot());
assertFalse(fooNode.isUnderConstruction());
restartClusterAndCheckImage(true);
@ -1715,8 +1711,7 @@ public void testRenameUndo_7() throws Exception {
assertTrue(diff.getChildrenDiff().getList(ListType.CREATED).isEmpty());
// bar was converted to filewithsnapshot while renaming
INodeFileWithSnapshot barNode = (INodeFileWithSnapshot) fsdir
.getINode4Write(bar.toString());
INodeFile barNode = fsdir.getINode4Write(bar.toString()).asFile();
assertSame(barNode, children.get(0));
assertSame(fooNode, barNode.getParent());
List<FileDiff> barDiffList = barNode.getDiffs().asList();

View File

@ -19,6 +19,7 @@
import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
@ -167,7 +168,8 @@ public void testDeletionWithSnapshots() throws Exception {
Assert.assertSame(INodeFile.class, f1.getClass());
hdfs.setReplication(file1, (short)2);
f1 = assertBlockCollection(file1.toString(), 2, fsdir, blockmanager);
Assert.assertSame(INodeFileWithSnapshot.class, f1.getClass());
assertTrue(f1.isWithSnapshot());
assertFalse(f1.isUnderConstruction());
}
// Check the block information for file0

View File

@ -277,10 +277,10 @@ public void testDeleteCurrentFileDirectory() throws Exception {
TestSnapshotBlocksMap.assertBlockCollection(new Path(snapshotNoChangeDir,
noChangeFileSCopy.getLocalName()).toString(), 1, fsdir, blockmanager);
INodeFileWithSnapshot metaChangeFile2SCopy =
(INodeFileWithSnapshot) children.get(0);
INodeFile metaChangeFile2SCopy = children.get(0).asFile();
assertEquals(metaChangeFile2.getName(), metaChangeFile2SCopy.getLocalName());
assertEquals(INodeFileWithSnapshot.class, metaChangeFile2SCopy.getClass());
assertTrue(metaChangeFile2SCopy.isWithSnapshot());
assertFalse(metaChangeFile2SCopy.isUnderConstruction());
TestSnapshotBlocksMap.assertBlockCollection(new Path(snapshotNoChangeDir,
metaChangeFile2SCopy.getLocalName()).toString(), 1, fsdir, blockmanager);
@ -338,8 +338,9 @@ public void testDeleteCurrentFileDirectory() throws Exception {
INode child = children.get(0);
assertEquals(child.getLocalName(), metaChangeFile1.getName());
// check snapshot copy of metaChangeFile1
assertEquals(INodeFileWithSnapshot.class, child.getClass());
INodeFileWithSnapshot metaChangeFile1SCopy = (INodeFileWithSnapshot) child;
INodeFile metaChangeFile1SCopy = child.asFile();
assertTrue(metaChangeFile1SCopy.isWithSnapshot());
assertFalse(metaChangeFile1SCopy.isUnderConstruction());
assertEquals(REPLICATION_1,
metaChangeFile1SCopy.getFileReplication(null));
assertEquals(REPLICATION_1,

View File

@ -261,6 +261,34 @@ public Void run() throws IOException, URISyntaxException {
}
}
@Test(timeout=300000)
public void testNumericalUserName() throws Exception {
final Configuration conf = WebHdfsTestUtil.createConf();
conf.set(DFSConfigKeys.DFS_WEBHDFS_USER_PATTERN_KEY, "^[A-Za-z0-9_][A-Za-z0-9._-]*[$]?$");
final MiniDFSCluster cluster =
new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
try {
cluster.waitActive();
WebHdfsTestUtil.getWebHdfsFileSystem(conf, WebHdfsFileSystem.SCHEME)
.setPermission(new Path("/"),
new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
UserGroupInformation.createUserForTesting("123", new String[]{"my-group"})
.doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws IOException, URISyntaxException {
FileSystem fs = WebHdfsTestUtil.getWebHdfsFileSystem(conf,
WebHdfsFileSystem.SCHEME);
Path d = new Path("/my-dir");
Assert.assertTrue(fs.mkdirs(d));
return null;
}
});
} finally {
cluster.shutdown();
}
}
/**
* WebHdfs should be enabled by default after HDFS-5532
*

View File

@ -285,4 +285,19 @@ public void testConcatSourcesParam() {
Assert.assertEquals(expected, computed.getValue());
}
}
@Test
public void testUserNameOkAfterResettingPattern() {
UserParam.Domain oldDomain = UserParam.getUserPatternDomain();
String newPattern = "^[A-Za-z0-9_][A-Za-z0-9._-]*[$]?$";
UserParam.setUserPattern(newPattern);
UserParam userParam = new UserParam("1x");
assertNotNull(userParam.getValue());
userParam = new UserParam("123");
assertNotNull(userParam.getValue());
UserParam.setUserPatternDomain(oldDomain);
}
}

View File

@ -867,5 +867,10 @@ public ApplicationResourceUsageReport getAppResourceUsageReport(
ApplicationAttemptId appAttemptId) {
return scheduler.getAppResourceUsageReport(appAttemptId);
}
@Override
public List<ApplicationAttemptId> getAppsInQueue(String queue) {
return scheduler.getAppsInQueue(queue);
}
}

View File

@ -141,6 +141,16 @@ Release 2.4.0 - UNRELEASED
YARN-1403. Separate out configuration loading from QueueManager in the Fair
Scheduler (Sandy Ryza)
YARN-1181. Augment MiniYARNCluster to support HA mode (Karthik Kambatla)
YARN-546. Allow disabling the Fair Scheduler event log (Sandy Ryza)
YARN-807. When querying apps by queue, iterating over all apps is
inefficient and limiting (Sandy Ryza)
YARN-1378. Implemented a cleaner of old finished applications from the RM
state-store. (Jian He via vinodkv)
OPTIMIZATIONS
BUG FIXES
@ -206,6 +216,12 @@ Release 2.4.0 - UNRELEASED
YARN-1454. Fixed test failure issue with TestRMRestart. (Karthik Kambatla
via vinodkv)
YARN-1450. Fixed test failure in TestUnmanagedAMLauncher by removing its
dependency on distributed-shell. (Binglin Chang via vinodkv)
YARN-1405. Fixed ResourceManager to not hang when init/start fails with an
exception w.r.t state-store. (Jian He via vinodkv)
Release 2.3.0 - UNRELEASED
INCOMPATIBLE CHANGES
@ -292,6 +308,9 @@ Release 2.3.0 - UNRELEASED
YARN-1407. RM Web UI and REST APIs should uniformly use
YarnApplicationState (Sandy Ryza)
YARN-1438. Ensure container diagnostics includes exception from container
launch. (stevel via acmurthy)
Release 2.2.0 - 2013-10-13
INCOMPATIBLE CHANGES

View File

@ -342,7 +342,16 @@ public class YarnConfiguration extends Configuration {
public static final String RM_MAX_COMPLETED_APPLICATIONS =
RM_PREFIX + "max-completed-applications";
public static final int DEFAULT_RM_MAX_COMPLETED_APPLICATIONS = 10000;
/**
* The maximum number of completed applications RM state store keeps, by
* default equals to DEFAULT_RM_MAX_COMPLETED_APPLICATIONS
*/
public static final String RM_STATE_STORE_MAX_COMPLETED_APPLICATIONS =
RM_PREFIX + "state-store.max-completed-applications";
public static final int DEFAULT_RM_STATE_STORE_MAX_COMPLETED_APPLICATIONS =
DEFAULT_RM_MAX_COMPLETED_APPLICATIONS;
/** Default application name */
public static final String DEFAULT_APPLICATION_NAME = "N/A";

View File

@ -71,11 +71,6 @@
<artifactId>hadoop-mapreduce-client-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-applications-distributedshell</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-tests</artifactId>

View File

@ -32,7 +32,12 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.client.ClientRMProxy;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.MiniYARNCluster;
import org.junit.AfterClass;
@ -122,8 +127,6 @@ public void testDSShell() throws Exception {
LOG.fatal("JAVA_HOME not defined. Test not running.");
return;
}
// start dist-shell with 0 containers because container launch will fail if
// there are no dist cache resources.
String[] args = {
"--classpath",
classpath,
@ -132,9 +135,8 @@ public void testDSShell() throws Exception {
"--cmd",
javaHome
+ "/bin/java -Xmx512m "
+ "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster "
+ "--container_memory 128 --num_containers 1 --priority 0 "
+ "--shell_command " + (Shell.WINDOWS ? "dir" : "ls") };
+ TestUnmanagedAMLauncher.class.getCanonicalName()
+ " success" };
LOG.info("Initializing Launcher");
UnmanagedAMLauncher launcher = new UnmanagedAMLauncher(new Configuration(
@ -157,8 +159,6 @@ public void testDSShellError() throws Exception {
LOG.fatal("JAVA_HOME not defined. Test not running.");
return;
}
// remove shell command to make dist-shell fail in initialization itself
String[] args = {
"--classpath",
classpath,
@ -167,8 +167,8 @@ public void testDSShellError() throws Exception {
"--cmd",
javaHome
+ "/bin/java -Xmx512m "
+ "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster "
+ "--container_memory 128 --num_containers 1 --priority 0" };
+ TestUnmanagedAMLauncher.class.getCanonicalName()
+ " failure" };
LOG.info("Initializing Launcher");
UnmanagedAMLauncher launcher = new UnmanagedAMLauncher(new Configuration(
@ -185,4 +185,19 @@ public void testDSShellError() throws Exception {
}
}
// provide main method so this class can act as AM
public static void main(String[] args) throws Exception {
if (args[0].equals("success")) {
ApplicationMasterProtocol client = ClientRMProxy.createRMProxy(conf,
ApplicationMasterProtocol.class);
client.registerApplicationMaster(RegisterApplicationMasterRequest
.newInstance(NetUtils.getHostname(), -1, ""));
Thread.sleep(1000);
client.finishApplicationMaster(FinishApplicationMasterRequest
.newInstance(FinalApplicationStatus.SUCCEEDED, "success", null));
System.exit(0);
} else {
System.exit(1);
}
}
}

View File

@ -275,6 +275,21 @@
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore</value>
</property>
<property>
<description>The maximum number of completed applications RM state
store keeps, less than or equals to ${yarn.resourcemanager.max-completed-applications}.
By default, it equals to ${yarn.resourcemanager.max-completed-applications}.
This ensures that the applications kept in the state store are consistent with
the applications remembered in RM memory.
Any values larger than ${yarn.resourcemanager.max-completed-applications} will
be reset to ${yarn.resourcemanager.max-completed-applications}.
Note that this value impacts the RM recovery performance.Typically,
a smaller value indicates better performance on RM recovery.
</description>
<name>yarn.resourcemanager.state-store.max-completed-applications</name>
<value>${yarn.resourcemanager.max-completed-applications}</value>
</property>
<property>
<description>Host:Port of the ZooKeeper server where RM state will
be stored. This must be supplied when using

View File

@ -213,7 +213,8 @@ public int launchContainer(Container container,
LOG.warn("Exception from container-launch with container ID: "
+ containerId + " and exit code: " + exitCode , e);
logOutput(shExec.getOutput());
String diagnostics = "Exception from container-launch: \n"
String diagnostics = "Exception from container-launch: "
+ e + "\n"
+ StringUtils.stringifyException(e) + "\n" + shExec.getOutput();
container.handle(new ContainerDiagnosticsUpdateEvent(containerId,
diagnostics));

View File

@ -24,7 +24,9 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
@ -67,6 +69,7 @@
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
@ -431,12 +434,51 @@ public GetApplicationsResponse getApplications(
LongRange start = request.getStartRange();
LongRange finish = request.getFinishRange();
List<ApplicationReport> reports = new ArrayList<ApplicationReport>();
long count = 0;
for (RMApp application : this.rmContext.getRMApps().values()) {
if (++count > limit) {
break;
final Map<ApplicationId, RMApp> apps = rmContext.getRMApps();
Iterator<RMApp> appsIter;
// If the query filters by queues, we can avoid considering apps outside
// of those queues by asking the scheduler for the apps in those queues.
if (queues != null && !queues.isEmpty()) {
// Construct an iterator over apps in given queues
// Collect list of lists to avoid copying all apps
final List<List<ApplicationAttemptId>> queueAppLists =
new ArrayList<List<ApplicationAttemptId>>();
for (String queue : queues) {
List<ApplicationAttemptId> appsInQueue = scheduler.getAppsInQueue(queue);
if (appsInQueue != null && !appsInQueue.isEmpty()) {
queueAppLists.add(appsInQueue);
}
}
appsIter = new Iterator<RMApp>() {
Iterator<List<ApplicationAttemptId>> appListIter = queueAppLists.iterator();
Iterator<ApplicationAttemptId> schedAppsIter;
@Override
public boolean hasNext() {
// Because queueAppLists has no empty lists, hasNext is whether the
// current list hasNext or whether there are any remaining lists
return (schedAppsIter != null && schedAppsIter.hasNext())
|| appListIter.hasNext();
}
@Override
public RMApp next() {
if (schedAppsIter == null || !schedAppsIter.hasNext()) {
schedAppsIter = appListIter.next().iterator();
}
return apps.get(schedAppsIter.next().getApplicationId());
}
@Override
public void remove() {
throw new UnsupportedOperationException("Remove not supported");
}
};
} else {
appsIter = apps.values().iterator();
}
List<ApplicationReport> reports = new ArrayList<ApplicationReport>();
while (appsIter.hasNext() && reports.size() < limit) {
RMApp application = appsIter.next();
if (applicationTypes != null && !applicationTypes.isEmpty()) {
String appTypeToMatch = caseSensitive
? application.getApplicationType()
@ -458,11 +500,6 @@ public GetApplicationsResponse getApplications(
continue;
}
if (queues != null && !queues.isEmpty() &&
!queues.contains(application.getQueue())) {
continue;
}
if (start != null && !start.containsLong(application.getStartTime())) {
continue;
}
@ -515,13 +552,12 @@ public GetQueueInfoResponse getQueueInfo(GetQueueInfoRequest request)
request.getRecursive());
List<ApplicationReport> appReports = EMPTY_APPS_REPORT;
if (request.getIncludeApplications()) {
Collection<RMApp> apps = this.rmContext.getRMApps().values();
appReports = new ArrayList<ApplicationReport>(
apps.size());
for (RMApp app : apps) {
if (app.getQueue().equals(queueInfo.getQueueName())) {
appReports.add(app.createAndGetApplicationReport(null, true));
}
List<ApplicationAttemptId> apps =
scheduler.getAppsInQueue(request.getQueueName());
appReports = new ArrayList<ApplicationReport>(apps.size());
for (ApplicationAttemptId app : apps) {
RMApp rmApp = rmContext.getRMApps().get(app.getApplicationId());
appReports.add(rmApp.createAndGetApplicationReport(null, true));
}
}
queueInfo.setApplications(appReports);

View File

@ -65,7 +65,9 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent>,
private static final Log LOG = LogFactory.getLog(RMAppManager.class);
private int completedAppsMax = YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS;
private int maxCompletedAppsInMemory;
private int maxCompletedAppsInStateStore;
protected int completedAppsInStateStore = 0;
private LinkedList<ApplicationId> completedApps = new LinkedList<ApplicationId>();
private final RMContext rmContext;
@ -82,9 +84,16 @@ public RMAppManager(RMContext context,
this.masterService = masterService;
this.applicationACLsManager = applicationACLsManager;
this.conf = conf;
setCompletedAppsMax(conf.getInt(
this.maxCompletedAppsInMemory = conf.getInt(
YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS,
YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS));
YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS);
this.maxCompletedAppsInStateStore =
conf.getInt(
YarnConfiguration.RM_STATE_STORE_MAX_COMPLETED_APPLICATIONS,
YarnConfiguration.DEFAULT_RM_STATE_STORE_MAX_COMPLETED_APPLICATIONS);
if (this.maxCompletedAppsInStateStore > this.maxCompletedAppsInMemory) {
this.maxCompletedAppsInStateStore = this.maxCompletedAppsInMemory;
}
}
/**
@ -173,10 +182,6 @@ public void logApplicationSummary(ApplicationId appId) {
ApplicationSummary.logAppSummary(rmContext.getRMApps().get(appId));
}
protected synchronized void setCompletedAppsMax(int max) {
this.completedAppsMax = max;
}
protected synchronized int getCompletedAppsListSize() {
return this.completedApps.size();
}
@ -190,7 +195,8 @@ protected synchronized void finishApplication(ApplicationId applicationId) {
rmContext.getDelegationTokenRenewer().applicationFinished(applicationId);
}
completedApps.add(applicationId);
completedApps.add(applicationId);
completedAppsInStateStore++;
writeAuditLog(applicationId);
}
}
@ -229,15 +235,31 @@ protected void writeAuditLog(ApplicationId appId) {
* check to see if hit the limit for max # completed apps kept
*/
protected synchronized void checkAppNumCompletedLimit() {
while (completedApps.size() > this.completedAppsMax) {
ApplicationId removeId = completedApps.remove();
LOG.info("Application should be expired, max # apps"
+ " met. Removing app: " + removeId);
// check apps kept in state store.
while (completedAppsInStateStore > this.maxCompletedAppsInStateStore) {
ApplicationId removeId =
completedApps.get(completedApps.size() - completedAppsInStateStore);
RMApp removeApp = rmContext.getRMApps().get(removeId);
LOG.info("Max number of completed apps kept in state store met:"
+ " maxCompletedAppsInStateStore = " + maxCompletedAppsInStateStore
+ ", removing app " + removeApp.getApplicationId()
+ " from state store.");
rmContext.getStateStore().removeApplication(removeApp);
completedAppsInStateStore--;
}
// check apps kept in memorty.
while (completedApps.size() > this.maxCompletedAppsInMemory) {
ApplicationId removeId = completedApps.remove();
LOG.info("Application should be expired, max number of completed apps"
+ " kept in memory met: maxCompletedAppsInMemory = "
+ this.maxCompletedAppsInMemory + ", removing app " + removeId
+ " from memory: ");
rmContext.getRMApps().remove(removeId);
this.applicationACLsManager.removeApplication(removeId);
}
}
@SuppressWarnings("unchecked")
protected void submitApplication(
ApplicationSubmissionContext submissionContext, long submitTime,
@ -380,8 +402,6 @@ public void recover(RMState state) throws Exception {
Map<ApplicationId, ApplicationState> appStates = state.getApplicationState();
LOG.info("Recovering " + appStates.size() + " applications");
for (ApplicationState appState : appStates.values()) {
LOG.info("Recovering application " + appState.getAppId());
submitApplication(appState.getApplicationSubmissionContext(),
appState.getSubmitTime(), appState.getUser(), true, state);
}

View File

@ -362,7 +362,7 @@ protected void serviceInit(Configuration configuration) throws Exception {
// the Exception from stateStore.init() needs to be handled for
// HA and we need to give up master status if we got fenced
LOG.error("Failed to init state store", e);
ExitUtil.terminate(1, e);
throw e;
}
rmContext.setStateStore(rmStore);
@ -470,7 +470,7 @@ protected void serviceStart() throws Exception {
// the Exception from loadState() needs to be handled for
// HA and we need to give up master status if we got fenced
LOG.error("Failed to load/recover state", e);
ExitUtil.terminate(1, e);
throw e;
}
}

View File

@ -167,7 +167,9 @@ private void loadRMAppState(RMState rmState) throws Exception {
readFile(childNodeStatus.getPath(), childNodeStatus.getLen());
if (childNodeName.startsWith(ApplicationId.appIdStrPrefix)) {
// application
LOG.info("Loading application from node: " + childNodeName);
if (LOG.isDebugEnabled()) {
LOG.debug("Loading application from node: " + childNodeName);
}
ApplicationId appId = ConverterUtils.toApplicationId(childNodeName);
ApplicationStateDataPBImpl appStateData =
new ApplicationStateDataPBImpl(
@ -185,7 +187,10 @@ private void loadRMAppState(RMState rmState) throws Exception {
} else if (childNodeName
.startsWith(ApplicationAttemptId.appAttemptIdStrPrefix)) {
// attempt
LOG.info("Loading application attempt from node: " + childNodeName);
if (LOG.isDebugEnabled()) {
LOG.debug("Loading application attempt from node: "
+ childNodeName);
}
ApplicationAttemptId attemptId =
ConverterUtils.toApplicationAttemptId(childNodeName);
ApplicationAttemptStateDataPBImpl attemptStateData =
@ -225,6 +230,7 @@ private void loadRMAppState(RMState rmState) throws Exception {
assert appState != null;
appState.attempts.put(attemptState.getAttemptId(), attemptState);
}
LOG.info("Done Loading applications from FS state store");
} catch (Exception e) {
LOG.error("Failed to load state.", e);
throw e;
@ -362,7 +368,7 @@ public synchronized void updateApplicationAttemptStateInternal(
}
@Override
public synchronized void removeApplicationState(ApplicationState appState)
public synchronized void removeApplicationStateInternal(ApplicationState appState)
throws Exception {
String appId = appState.getAppId().toString();
Path nodeRemovePath = getAppDir(rmAppRoot, appId);

View File

@ -171,8 +171,8 @@ public synchronized void updateApplicationAttemptStateInternal(
}
@Override
public synchronized void removeApplicationState(ApplicationState appState)
throws Exception {
public synchronized void removeApplicationStateInternal(
ApplicationState appState) throws Exception {
ApplicationId appId = appState.getAppId();
ApplicationState removed = state.appState.remove(appId);
if (removed == null) {

View File

@ -63,7 +63,7 @@ protected void storeApplicationAttemptStateInternal(String attemptId,
}
@Override
protected void removeApplicationState(ApplicationState appState)
protected void removeApplicationStateInternal(ApplicationState appState)
throws Exception {
// Do nothing
}

View File

@ -53,7 +53,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppNewSavedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRemovedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppUpdateSavedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
@ -519,6 +518,7 @@ protected abstract void removeRMDTMasterKeyState(DelegationKey delegationKey)
* This does not block the dispatcher threads
* There is no notification of completion for this operation.
*/
@SuppressWarnings("unchecked")
public synchronized void removeApplication(RMApp app) {
ApplicationState appState = new ApplicationState(
app.getSubmitTime(), app.getStartTime(),
@ -532,14 +532,6 @@ public synchronized void removeApplication(RMApp app) {
appState.attempts.put(attemptState.getAttemptId(), attemptState);
}
removeApplication(appState);
}
@SuppressWarnings("unchecked")
/**
* Non-Blocking API
*/
public synchronized void removeApplication(ApplicationState appState) {
dispatcher.getEventHandler().handle(new RMStateStoreRemoveAppEvent(appState));
}
@ -548,8 +540,8 @@ public synchronized void removeApplication(ApplicationState appState) {
* Derived classes must implement this method to remove the state of an
* application and its attempts
*/
protected abstract void removeApplicationState(ApplicationState appState)
throws Exception;
protected abstract void removeApplicationStateInternal(
ApplicationState appState) throws Exception;
// TODO: This should eventually become cluster-Id + "AM_RM_TOKEN_SERVICE". See
// YARN-986
@ -666,11 +658,9 @@ protected void handleStoreEvent(RMStateStoreEvent event) {
ApplicationState appState =
((RMStateStoreRemoveAppEvent) event).getAppState();
ApplicationId appId = appState.getAppId();
Exception removedException = null;
LOG.info("Removing info for app: " + appId);
try {
removeApplicationState(appState);
notifyDoneRemovingApplcation(appId, removedException);
removeApplicationStateInternal(appState);
} catch (Exception e) {
LOG.error("Error removing app: " + appId, e);
notifyStoreOperationFailed(e);
@ -738,17 +728,6 @@ private void notifyDoneUpdatingApplicationAttempt(ApplicationAttemptId attemptId
new RMAppAttemptUpdateSavedEvent(attemptId, updatedException));
}
@SuppressWarnings("unchecked")
/**
* This is to notify RMApp that this application has been removed from
* RMStateStore
*/
private void notifyDoneRemovingApplcation(ApplicationId appId,
Exception removedException) {
rmDispatcher.getEventHandler().handle(
new RMAppRemovedEvent(appId, removedException));
}
/**
* EventHandler implementation which forward events to the FSRMStateStore
* This hides the EventHandle methods of the store from its public interface

View File

@ -392,7 +392,9 @@ private synchronized void loadRMAppState(RMState rmState) throws Exception {
byte[] childData = getDataWithRetries(childNodePath, true);
if (childNodeName.startsWith(ApplicationId.appIdStrPrefix)) {
// application
LOG.info("Loading application from znode: " + childNodeName);
if (LOG.isDebugEnabled()) {
LOG.debug("Loading application from znode: " + childNodeName);
}
ApplicationId appId = ConverterUtils.toApplicationId(childNodeName);
ApplicationStateDataPBImpl appStateData =
new ApplicationStateDataPBImpl(
@ -412,7 +414,9 @@ private synchronized void loadRMAppState(RMState rmState) throws Exception {
} else if (childNodeName
.startsWith(ApplicationAttemptId.appAttemptIdStrPrefix)) {
// attempt
LOG.info("Loading application attempt from znode: " + childNodeName);
if (LOG.isDebugEnabled()) {
LOG.debug("Loading application attempt from znode: " + childNodeName);
}
ApplicationAttemptId attemptId =
ConverterUtils.toApplicationAttemptId(childNodeName);
ApplicationAttemptStateDataPBImpl attemptStateData =
@ -456,10 +460,10 @@ private synchronized void loadRMAppState(RMState rmState) throws Exception {
LOG.info("Application node not found for attempt: "
+ attemptState.getAttemptId());
deleteWithRetries(
getNodePath(rmAppRoot, attemptState.getAttemptId().toString()),
0);
getNodePath(rmAppRoot, attemptState.getAttemptId().toString()), -1);
}
}
LOG.info("Done Loading applications from ZK state store");
}
@Override
@ -517,16 +521,16 @@ public synchronized void updateApplicationAttemptStateInternal(
}
@Override
public synchronized void removeApplicationState(ApplicationState appState)
public synchronized void removeApplicationStateInternal(ApplicationState appState)
throws Exception {
String appId = appState.getAppId().toString();
String nodeRemovePath = getNodePath(rmAppRoot, appId);
ArrayList<Op> opList = new ArrayList<Op>();
opList.add(Op.delete(nodeRemovePath, 0));
opList.add(Op.delete(nodeRemovePath, -1));
for (ApplicationAttemptId attemptId : appState.attempts.keySet()) {
String attemptRemovePath = getNodePath(rmAppRoot, attemptId.toString());
opList.add(Op.delete(attemptRemovePath, 0));
opList.add(Op.delete(attemptRemovePath, -1));
}
if (LOG.isDebugEnabled()) {
LOG.debug("Removing info for app: " + appId + " at: " + nodeRemovePath
@ -569,7 +573,7 @@ protected synchronized void storeRMDelegationTokenAndSequenceNumberState(
}
if (dtSequenceNumberPath != null) {
opList.add(Op.delete(dtSequenceNumberPath, 0));
opList.add(Op.delete(dtSequenceNumberPath, -1));
}
opList.add(Op.create(latestSequenceNumberPath, null, zkAcl,
CreateMode.PERSISTENT));
@ -587,7 +591,7 @@ protected synchronized void removeRMDelegationTokenState(
LOG.debug("Removing RMDelegationToken_"
+ rmDTIdentifier.getSequenceNumber());
}
deleteWithRetries(nodeRemovePath, 0);
deleteWithRetries(nodeRemovePath, -1);
}
@Override
@ -619,7 +623,7 @@ protected synchronized void removeRMDTMasterKeyState(
if (LOG.isDebugEnabled()) {
LOG.debug("Removing RMDelegationKey_" + delegationKey.getKeyId());
}
deleteWithRetries(nodeRemovePath, 0);
deleteWithRetries(nodeRemovePath, -1);
}
// ZK related code

View File

@ -660,32 +660,34 @@ public void transition(RMAppImpl app, RMAppEvent event) {
@SuppressWarnings("unchecked")
private static final class RMAppRecoveredTransition implements
MultipleArcTransition<RMAppImpl, RMAppEvent, RMAppState> {
@Override
public RMAppState transition(RMAppImpl app, RMAppEvent event) {
if (app.attempts.isEmpty()) {
// Saved application was not running any attempts.
app.createNewAttempt(true);
return RMAppState.SUBMITTED;
} else {
/*
* If last attempt recovered final state is null .. it means attempt
* was started but AM container may or may not have started / finished.
* Therefore we should wait for it to finish.
*/
for (RMAppAttempt attempt : app.getAppAttempts().values()) {
app.dispatcher.getEventHandler().handle(
new RMAppAttemptEvent(attempt.getAppAttemptId(),
RMAppAttemptEventType.RECOVER));
}
if (app.recoveredFinalState != null) {
FINAL_TRANSITION.transition(app, event);
return app.recoveredFinalState;
} else {
return RMAppState.RUNNING;
}
/*
* If last attempt recovered final state is null .. it means attempt was
* started but AM container may or may not have started / finished.
* Therefore we should wait for it to finish.
*/
for (RMAppAttempt attempt : app.getAppAttempts().values()) {
app.dispatcher.getEventHandler().handle(
new RMAppAttemptEvent(attempt.getAppAttemptId(),
RMAppAttemptEventType.RECOVER));
}
// The app has completed.
if (app.recoveredFinalState != null) {
FINAL_TRANSITION.transition(app, event);
return app.recoveredFinalState;
}
// No existent attempts means the attempt associated with this app was not
// started or started but not yet saved
if (app.attempts.isEmpty()) {
app.createNewAttempt(true);
return RMAppState.SUBMITTED;
}
return RMAppState.RUNNING;
}
}

View File

@ -1,36 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
import org.apache.hadoop.yarn.api.records.ApplicationId;
public class RMAppRemovedEvent extends RMAppEvent {
private final Exception removedException;
public RMAppRemovedEvent(ApplicationId appId, Exception removedException) {
super(appId, RMAppEventType.APP_REMOVED);
this.removedException = removedException;
}
public Exception getRemovedException() {
return removedException;
}
}

View File

@ -76,14 +76,13 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFinishedAttemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAcquiredEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRejectedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUpdateSavedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
@ -675,9 +674,8 @@ public void recover(RMState state) throws Exception {
ApplicationAttemptState attemptState =
appState.getAttempt(getAppAttemptId());
assert attemptState != null;
LOG.info("Recovered attempt: AppId: "
+ getAppAttemptId().getApplicationId() + " AttemptId: "
+ getAppAttemptId() + " MasterContainer: " + masterContainer);
LOG.info("Recovering attempt: " + getAppAttemptId() + " with final state: "
+ attemptState.getState());
diagnostics.append("Attempt recovered after RM restart");
diagnostics.append(attemptState.getDiagnostics());
setMasterContainer(attemptState.getMasterContainer());
@ -856,8 +854,6 @@ private static class AttemptRecoveredTransition
@Override
public RMAppAttemptState transition(RMAppAttemptImpl appAttempt,
RMAppAttemptEvent event) {
LOG.info("Recovering attempt : recoverdFinalState :"
+ appAttempt.recoveredFinalState);
if (appAttempt.recoveredFinalState != null) {
appAttempt.progress = 1.0f;
RMApp rmApp =appAttempt.rmContext.getRMApps().get(

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate;
@ -160,4 +161,13 @@ ApplicationResourceUsageReport getAppResourceUsageReport(
*/
boolean checkAccess(UserGroupInformation callerUGI,
QueueACL acl, String queueName);
/**
* Gets the apps under a given queue
* @param queueName the name of the queue.
* @return a collection of app attempt ids in the given queue.
*/
@LimitedPrivate("yarn")
@Stable
public List<ApplicationAttemptId> getAppsInQueue(String queueName);
}

View File

@ -19,12 +19,14 @@
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Stable;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.QueueACL;
@ -33,6 +35,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
@ -228,4 +231,10 @@ public void reinitialize(CSQueue newlyParsedQueue, Resource clusterResource)
*/
public void recoverContainer(Resource clusterResource, FiCaSchedulerApp application,
Container container);
/**
* Adds all applications in the queue and its subqueues to the given collection.
* @param apps the collection to add the applications to
*/
public void collectSchedulerApplications(Collection<ApplicationAttemptId> apps);
}

View File

@ -66,6 +66,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
@ -941,4 +942,14 @@ public synchronized boolean checkAccess(UserGroupInformation callerUGI,
return queue.hasAccess(acl, callerUGI);
}
@Override
public List<ApplicationAttemptId> getAppsInQueue(String queueName) {
CSQueue queue = queues.get(queueName);
if (queue == null) {
return null;
}
List<ApplicationAttemptId> apps = new ArrayList<ApplicationAttemptId>();
queue.collectSchedulerApplications(apps);
return apps;
}
}

View File

@ -20,6 +20,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
@ -58,6 +59,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
@ -1622,4 +1624,12 @@ public Resource getTotalResourcePending() {
return ret;
}
@Override
public void collectSchedulerApplications(
Collection<ApplicationAttemptId> apps) {
for (FiCaSchedulerApp app : activeApplications) {
apps.add(app.getApplicationAttemptId());
}
}
}

View File

@ -36,6 +36,7 @@
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authorize.AccessControlList;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.QueueACL;
@ -50,6 +51,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
@ -764,4 +766,12 @@ public void recoverContainer(Resource clusterResource,
parent.recoverContainer(clusterResource, application, container);
}
}
@Override
public void collectSchedulerApplications(
Collection<ApplicationAttemptId> apps) {
for (CSQueue queue : childQueues) {
queue.collectSchedulerApplications(apps);
}
}
}

View File

@ -29,11 +29,13 @@
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.QueueACL;
import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppUtils;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
@Private
@Unstable
@ -105,6 +107,17 @@ public Collection<AppSchedulable> getRunnableAppSchedulables() {
public List<AppSchedulable> getNonRunnableAppSchedulables() {
return nonRunnableAppScheds;
}
@Override
public void collectSchedulerApplications(
Collection<ApplicationAttemptId> apps) {
for (AppSchedulable appSched : runnableAppScheds) {
apps.add(appSched.getApp().getApplicationAttemptId());
}
for (AppSchedulable appSched : nonRunnableAppScheds) {
apps.add(appSched.getApp().getApplicationAttemptId());
}
}
@Override
public void setPolicy(SchedulingPolicy policy)

View File

@ -28,10 +28,12 @@
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.QueueACL;
import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
@Private
@Unstable
@ -184,4 +186,12 @@ public void decrementRunnableApps() {
public int getNumRunnableApps() {
return runnableApps;
}
@Override
public void collectSchedulerApplications(
Collection<ApplicationAttemptId> apps) {
for (FSQueue childQueue : childQueues) {
childQueue.collectSchedulerApplications(apps);
}
}
}

View File

@ -24,6 +24,7 @@
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.QueueACL;
import org.apache.hadoop.yarn.api.records.QueueInfo;
@ -158,7 +159,14 @@ public boolean hasAccess(QueueACL acl, UserGroupInformation user) {
* Gets the children of this queue, if any.
*/
public abstract Collection<FSQueue> getChildQueues();
/**
* Adds all applications in the queue and its subqueues to the given collection.
* @param apps the collection to add the applications to
*/
public abstract void collectSchedulerApplications(
Collection<ApplicationAttemptId> apps);
/**
* Return the number of apps for which containers can be allocated.
* Includes apps in subqueues.

View File

@ -72,6 +72,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
@ -1267,4 +1268,15 @@ public void onReload(AllocationConfiguration queueInfo) {
}
}
@Override
public List<ApplicationAttemptId> getAppsInQueue(String queueName) {
FSQueue queue = queueMgr.getQueue(queueName);
if (queue == null) {
return null;
}
List<ApplicationAttemptId> apps = new ArrayList<ApplicationAttemptId>();
queue.collectSchedulerApplications(apps);
return apps;
}
}

View File

@ -53,6 +53,11 @@ public class FairSchedulerConfiguration extends Configuration {
public static final String ALLOCATION_FILE = CONF_PREFIX + "allocation.file";
protected static final String DEFAULT_ALLOCATION_FILE = "fair-scheduler.xml";
/** Whether to enable the Fair Scheduler event log */
public static final String EVENT_LOG_ENABLED = CONF_PREFIX + "event-log-enabled";
public static final boolean DEFAULT_EVENT_LOG_ENABLED = false;
protected static final String EVENT_LOG_DIR = "eventlog.dir";
/** Whether pools can be created that were not specified in the FS configuration file
@ -192,6 +197,10 @@ public boolean getSizeBasedWeight() {
return getBoolean(SIZE_BASED_WEIGHT, DEFAULT_SIZE_BASED_WEIGHT);
}
public boolean isEventLogEnabled() {
return getBoolean(EVENT_LOG_ENABLED, DEFAULT_EVENT_LOG_ENABLED);
}
public String getEventlogDir() {
return get(EVENT_LOG_DIR, new File(System.getProperty("hadoop.log.dir",
"/tmp/")).getAbsolutePath() + File.separator + "fairscheduler");

View File

@ -75,26 +75,30 @@ class FairSchedulerEventLog {
private DailyRollingFileAppender appender;
boolean init(FairSchedulerConfiguration conf) {
try {
logDir = conf.getEventlogDir();
File logDirFile = new File(logDir);
if (!logDirFile.exists()) {
if (!logDirFile.mkdirs()) {
throw new IOException(
"Mkdirs failed to create " + logDirFile.toString());
if (conf.isEventLogEnabled()) {
try {
logDir = conf.getEventlogDir();
File logDirFile = new File(logDir);
if (!logDirFile.exists()) {
if (!logDirFile.mkdirs()) {
throw new IOException(
"Mkdirs failed to create " + logDirFile.toString());
}
}
String username = System.getProperty("user.name");
logFile = String.format("%s%shadoop-%s-fairscheduler.log",
logDir, File.separator, username);
logDisabled = false;
PatternLayout layout = new PatternLayout("%d{ISO8601}\t%m%n");
appender = new DailyRollingFileAppender(layout, logFile, "'.'yyyy-MM-dd");
appender.activateOptions();
LOG.info("Initialized fair scheduler event log, logging to " + logFile);
} catch (IOException e) {
LOG.error(
"Failed to initialize fair scheduler event log. Disabling it.", e);
logDisabled = true;
}
String username = System.getProperty("user.name");
logFile = String.format("%s%shadoop-%s-fairscheduler.log",
logDir, File.separator, username);
logDisabled = false;
PatternLayout layout = new PatternLayout("%d{ISO8601}\t%m%n");
appender = new DailyRollingFileAppender(layout, logFile, "'.'yyyy-MM-dd");
appender.activateOptions();
LOG.info("Initialized fair scheduler event log, logging to " + logFile);
} catch (IOException e) {
LOG.error(
"Failed to initialize fair scheduler event log. Disabling it.", e);
} else {
logDisabled = true;
}
return !(logDisabled);

View File

@ -21,6 +21,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@ -850,5 +851,19 @@ public synchronized boolean checkAccess(UserGroupInformation callerUGI,
QueueACL acl, String queueName) {
return DEFAULT_QUEUE.hasAccess(acl, callerUGI);
}
@Override
public synchronized List<ApplicationAttemptId> getAppsInQueue(String queueName) {
if (queueName.equals(DEFAULT_QUEUE.getQueueName())) {
List<ApplicationAttemptId> apps = new ArrayList<ApplicationAttemptId>(
applications.size());
for (FiCaSchedulerApp app : applications.values()) {
apps.add(app.getApplicationAttemptId());
}
return apps;
} else {
return null;
}
}
}

View File

@ -19,8 +19,12 @@
package org.apache.hadoop.yarn.server.resourcemanager;
import static org.mockito.Matchers.isA;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import static org.mockito.Mockito.times;
import java.util.HashMap;
import java.util.List;
@ -43,6 +47,7 @@
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
@ -99,7 +104,7 @@ public static RMContext mockRMContext(int n, long time) {
rmDispatcher);
AMLivelinessMonitor amFinishingMonitor = new AMLivelinessMonitor(
rmDispatcher);
return new RMContextImpl(rmDispatcher,
RMContext context = new RMContextImpl(rmDispatcher,
containerAllocationExpirer, amLivelinessMonitor, amFinishingMonitor,
null, null, null, null, null) {
@Override
@ -107,6 +112,8 @@ public ConcurrentMap<ApplicationId, RMApp> getRMApps() {
return map;
}
};
((RMContextImpl)context).setStateStore(mock(RMStateStore.class));
return context;
}
public class TestAppManagerDispatcher implements
@ -142,7 +149,6 @@ public class TestRMAppManager extends RMAppManager {
public TestRMAppManager(RMContext context, Configuration conf) {
super(context, null, null, new ApplicationACLsManager(conf), conf);
setCompletedAppsMax(YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS);
}
public TestRMAppManager(RMContext context,
@ -150,7 +156,6 @@ public TestRMAppManager(RMContext context,
YarnScheduler scheduler, ApplicationMasterService masterService,
ApplicationACLsManager applicationACLsManager, Configuration conf) {
super(context, scheduler, masterService, applicationACLsManager, conf);
setCompletedAppsMax(YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS);
}
public void checkAppNumCompletedLimit() {
@ -164,9 +169,8 @@ public void finishApplication(ApplicationId appId) {
public int getCompletedAppsListSize() {
return super.getCompletedAppsListSize();
}
public void setCompletedAppsMax(int max) {
super.setCompletedAppsMax(max);
public int getCompletedAppsInStateStore() {
return this.completedAppsInStateStore;
}
public void submitApplication(
ApplicationSubmissionContext submissionContext, String user)
@ -227,9 +231,9 @@ public void testRMAppRetireNone() throws Exception {
// Create such that none of the applications will retire since
// haven't hit max #
RMContext rmContext = mockRMContext(10, now - 10);
TestRMAppManager appMonitor = new TestRMAppManager(rmContext, new Configuration());
appMonitor.setCompletedAppsMax(10);
Configuration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, 10);
TestRMAppManager appMonitor = new TestRMAppManager(rmContext,conf);
Assert.assertEquals("Number of apps incorrect before checkAppTimeLimit",
10, rmContext.getRMApps().size());
@ -243,6 +247,8 @@ public void testRMAppRetireNone() throws Exception {
rmContext.getRMApps().size());
Assert.assertEquals("Number of completed apps incorrect after check", 10,
appMonitor.getCompletedAppsListSize());
verify(rmContext.getStateStore(), never()).removeApplication(
isA(RMApp.class));
}
@Test
@ -250,9 +256,10 @@ public void testRMAppRetireSome() throws Exception {
long now = System.currentTimeMillis();
RMContext rmContext = mockRMContext(10, now - 20000);
TestRMAppManager appMonitor = new TestRMAppManager(rmContext, new Configuration());
appMonitor.setCompletedAppsMax(3);
Configuration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.RM_STATE_STORE_MAX_COMPLETED_APPLICATIONS, 3);
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, 3);
TestRMAppManager appMonitor = new TestRMAppManager(rmContext, conf);
Assert.assertEquals("Number of apps incorrect before", 10, rmContext
.getRMApps().size());
@ -266,6 +273,8 @@ public void testRMAppRetireSome() throws Exception {
rmContext.getRMApps().size());
Assert.assertEquals("Number of completed apps incorrect after check", 3,
appMonitor.getCompletedAppsListSize());
verify(rmContext.getStateStore(), times(7)).removeApplication(
isA(RMApp.class));
}
@Test
@ -274,14 +283,17 @@ public void testRMAppRetireSomeDifferentStates() throws Exception {
// these parameters don't matter, override applications below
RMContext rmContext = mockRMContext(10, now - 20000);
TestRMAppManager appMonitor = new TestRMAppManager(rmContext, new Configuration());
Configuration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.RM_STATE_STORE_MAX_COMPLETED_APPLICATIONS, 2);
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, 2);
appMonitor.setCompletedAppsMax(2);
TestRMAppManager appMonitor = new TestRMAppManager(rmContext, conf);
// clear out applications map
rmContext.getRMApps().clear();
Assert.assertEquals("map isn't empty", 0, rmContext.getRMApps().size());
// 6 applications are in final state, 4 are not in final state.
// / set with various finished states
RMApp app = new MockRMApp(0, now - 20000, RMAppState.KILLED);
rmContext.getRMApps().put(app.getApplicationId(), app);
@ -318,7 +330,9 @@ public void testRMAppRetireSomeDifferentStates() throws Exception {
rmContext.getRMApps().size());
Assert.assertEquals("Number of completed apps incorrect after check", 2,
appMonitor.getCompletedAppsListSize());
// 6 applications in final state, 4 of them are removed
verify(rmContext.getStateStore(), times(4)).removeApplication(
isA(RMApp.class));
}
@Test
@ -342,14 +356,13 @@ public void testRMAppRetireZeroSetting() throws Exception {
long now = System.currentTimeMillis();
RMContext rmContext = mockRMContext(10, now - 20000);
TestRMAppManager appMonitor = new TestRMAppManager(rmContext, new Configuration());
Configuration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.RM_STATE_STORE_MAX_COMPLETED_APPLICATIONS, 0);
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, 0);
TestRMAppManager appMonitor = new TestRMAppManager(rmContext, conf);
Assert.assertEquals("Number of apps incorrect before", 10, rmContext
.getRMApps().size());
// test with 0
appMonitor.setCompletedAppsMax(0);
addToCompletedApps(appMonitor, rmContext);
Assert.assertEquals("Number of completed apps incorrect", 10,
appMonitor.getCompletedAppsListSize());
@ -360,6 +373,64 @@ public void testRMAppRetireZeroSetting() throws Exception {
rmContext.getRMApps().size());
Assert.assertEquals("Number of completed apps incorrect after check", 0,
appMonitor.getCompletedAppsListSize());
verify(rmContext.getStateStore(), times(10)).removeApplication(
isA(RMApp.class));
}
@Test
public void testStateStoreAppLimitLessThanMemoryAppLimit() {
long now = System.currentTimeMillis();
RMContext rmContext = mockRMContext(10, now - 20000);
Configuration conf = new YarnConfiguration();
int maxAppsInMemory = 8;
int maxAppsInStateStore = 4;
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, maxAppsInMemory);
conf.setInt(YarnConfiguration.RM_STATE_STORE_MAX_COMPLETED_APPLICATIONS,
maxAppsInStateStore);
TestRMAppManager appMonitor = new TestRMAppManager(rmContext, conf);
addToCompletedApps(appMonitor, rmContext);
Assert.assertEquals("Number of completed apps incorrect", 10,
appMonitor.getCompletedAppsListSize());
appMonitor.checkAppNumCompletedLimit();
Assert.assertEquals("Number of apps incorrect after # completed check",
maxAppsInMemory, rmContext.getRMApps().size());
Assert.assertEquals("Number of completed apps incorrect after check",
maxAppsInMemory, appMonitor.getCompletedAppsListSize());
int numRemoveAppsFromStateStore = 10 - maxAppsInStateStore;
verify(rmContext.getStateStore(), times(numRemoveAppsFromStateStore))
.removeApplication(isA(RMApp.class));
Assert.assertEquals(maxAppsInStateStore,
appMonitor.getCompletedAppsInStateStore());
}
@Test
public void testStateStoreAppLimitLargerThanMemoryAppLimit() {
long now = System.currentTimeMillis();
RMContext rmContext = mockRMContext(10, now - 20000);
Configuration conf = new YarnConfiguration();
int maxAppsInMemory = 8;
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, maxAppsInMemory);
// larger than maxCompletedAppsInMemory, reset to RM_MAX_COMPLETED_APPLICATIONS.
conf.setInt(YarnConfiguration.RM_STATE_STORE_MAX_COMPLETED_APPLICATIONS, 1000);
TestRMAppManager appMonitor = new TestRMAppManager(rmContext, conf);
addToCompletedApps(appMonitor, rmContext);
Assert.assertEquals("Number of completed apps incorrect", 10,
appMonitor.getCompletedAppsListSize());
appMonitor.checkAppNumCompletedLimit();
int numRemoveApps = 10 - maxAppsInMemory;
Assert.assertEquals("Number of apps incorrect after # completed check",
maxAppsInMemory, rmContext.getRMApps().size());
Assert.assertEquals("Number of completed apps incorrect after check",
maxAppsInMemory, appMonitor.getCompletedAppsListSize());
verify(rmContext.getStateStore(), times(numRemoveApps)).removeApplication(
isA(RMApp.class));
Assert.assertEquals(maxAppsInMemory,
appMonitor.getCompletedAppsInStateStore());
}
protected void setupDispatcher(RMContext rmContext, Configuration conf) {

View File

@ -30,9 +30,12 @@
import java.io.IOException;
import java.net.InetSocketAddress;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.BrokenBarrierException;
import java.util.concurrent.ConcurrentHashMap;
@ -106,6 +109,9 @@ public class TestClientRMService {
private static RMDelegationTokenSecretManager dtsm;
private final static String QUEUE_1 = "Q-1";
private final static String QUEUE_2 = "Q-2";
@BeforeClass
public static void setupSecretManager() throws IOException {
RMContext rmContext = mock(RMContext.class);
@ -438,7 +444,7 @@ public void handle(Event event) {}
mockAclsManager, mockQueueACLsManager, null);
// Initialize appnames and queues
String[] queues = {"Q-1", "Q-2"};
String[] queues = {QUEUE_1, QUEUE_2};
String[] appNames =
{MockApps.newAppName(), MockApps.newAppName(), MockApps.newAppName()};
ApplicationId[] appIds =
@ -596,6 +602,8 @@ private void mockRMContext(YarnScheduler yarnScheduler, RMContext rmContext)
ConcurrentHashMap<ApplicationId, RMApp> apps = getRMApps(rmContext,
yarnScheduler);
when(rmContext.getRMApps()).thenReturn(apps);
when(yarnScheduler.getAppsInQueue(eq("testqueue"))).thenReturn(
getSchedulerApps(apps));
}
private ConcurrentHashMap<ApplicationId, RMApp> getRMApps(
@ -614,10 +622,23 @@ private ConcurrentHashMap<ApplicationId, RMApp> getRMApps(
config, "testqueue"));
return apps;
}
private List<ApplicationAttemptId> getSchedulerApps(
Map<ApplicationId, RMApp> apps) {
List<ApplicationAttemptId> schedApps = new ArrayList<ApplicationAttemptId>();
// Return app IDs for the apps in testqueue (as defined in getRMApps)
schedApps.add(ApplicationAttemptId.newInstance(getApplicationId(1), 0));
schedApps.add(ApplicationAttemptId.newInstance(getApplicationId(3), 0));
return schedApps;
}
private ApplicationId getApplicationId(int id) {
private static ApplicationId getApplicationId(int id) {
return ApplicationId.newInstance(123456, id);
}
private static ApplicationAttemptId getApplicationAttemptId(int id) {
return ApplicationAttemptId.newInstance(getApplicationId(id), 1);
}
private RMAppImpl getRMApp(RMContext rmContext, YarnScheduler yarnScheduler,
ApplicationId applicationId3, YarnConfiguration config, String queueName) {
@ -641,6 +662,10 @@ private static YarnScheduler mockYarnScheduler() {
when(yarnScheduler.getMaximumResourceCapability()).thenReturn(
Resources.createResource(
YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB));
when(yarnScheduler.getAppsInQueue(QUEUE_1)).thenReturn(
Arrays.asList(getApplicationAttemptId(101), getApplicationAttemptId(102)));
when(yarnScheduler.getAppsInQueue(QUEUE_2)).thenReturn(
Arrays.asList(getApplicationAttemptId(103)));
return yarnScheduler;
}
}

View File

@ -34,7 +34,6 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
@ -46,7 +45,7 @@
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.delegation.DelegationKey;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.service.Service.STATE;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
@ -68,9 +67,6 @@
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.Event;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
@ -80,6 +76,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationAttemptStateDataPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
@ -93,7 +90,6 @@
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mortbay.log.Log;
public class TestRMRestart {
@ -106,7 +102,6 @@ public class TestRMRestart {
public void setup() throws UnknownHostException {
Logger rootLogger = LogManager.getRootLogger();
rootLogger.setLevel(Level.DEBUG);
ExitUtil.disableSystemExit();
conf = new YarnConfiguration();
UserGroupInformation.setConfiguration(conf);
conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
@ -423,6 +418,8 @@ public void testRMRestartAppRunningAMFailed() throws Exception {
rm2.getRMContext().getRMApps().get(app0.getApplicationId());
Assert.assertEquals(RMAppAttemptState.FAILED, recoveredApp
.getAppAttempts().get(am0.getApplicationAttemptId()).getAppAttemptState());
rm1.stop();
rm2.stop();
}
@Test
@ -629,6 +626,8 @@ public void testRMRestartFailedApp() throws Exception {
.contains("Failing the application."));
// failed diagnostics from attempt is lost because the diagnostics from
// attempt is not yet available by the time app is saving the app state.
rm1.stop();
rm2.stop();
}
@Test
@ -675,6 +674,48 @@ public void testRMRestartKilledApp() throws Exception{
ApplicationReport appReport = verifyAppReportAfterRMRestart(app0, rm2);
Assert.assertEquals(app0.getDiagnostics().toString(),
appReport.getDiagnostics());
rm1.stop();
rm2.stop();
}
@Test
public void testRMRestartKilledAppWithNoAttempts() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore() {
@Override
public synchronized void storeApplicationAttemptStateInternal(
String attemptIdStr,
ApplicationAttemptStateDataPBImpl attemptStateData) throws Exception {
// ignore attempt saving request.
}
@Override
public synchronized void updateApplicationAttemptStateInternal(
String attemptIdStr,
ApplicationAttemptStateDataPBImpl attemptStateData) throws Exception {
// ignore attempt saving request.
}
};
memStore.init(conf);
// start RM
MockRM rm1 = new MockRM(conf, memStore);
rm1.start();
// create app
RMApp app0 =
rm1.submitApp(200, "name", "user",
new HashMap<ApplicationAccessType, String>(), false, "default", -1,
null, "MAPREDUCE", false);
// kill the app.
rm1.killApp(app0.getApplicationId());
rm1.waitForState(app0.getApplicationId(), RMAppState.KILLED);
// restart rm
MockRM rm2 = new MockRM(conf, memStore);
rm2.start();
RMApp loadedApp0 =
rm2.getRMContext().getRMApps().get(app0.getApplicationId());
rm2.waitForState(loadedApp0.getApplicationId(), RMAppState.KILLED);
Assert.assertTrue(loadedApp0.getAppAttempts().size() == 0);
}
@Test
@ -724,6 +765,9 @@ public void testRMRestartSucceededApp() throws Exception {
Assert.assertEquals(FinalApplicationStatus.SUCCEEDED,
appReport.getFinalApplicationStatus());
Assert.assertEquals("trackingUrl", appReport.getOriginalTrackingUrl());
rm1.stop();
rm2.stop();
}
@Test
@ -817,6 +861,9 @@ protected RMAppManager createRMAppManager() {
// check application summary is logged for the completed apps after RM restart.
verify(rm2.getRMAppManager(), times(3)).logApplicationSummary(
isA(ApplicationId.class));
rm1.stop();
rm2.stop();
}
private MockAM launchAM(RMApp app, MockRM rm, MockNM nm)
@ -1378,6 +1425,75 @@ protected void handleStoreEvent(RMStateStoreEvent event) {
Assert.assertTrue(rmAppState.size() == NUM_APPS);
}
@Test
public void testFinishedAppRemovalAfterRMRestart() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore();
conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, 1);
memStore.init(conf);
RMState rmState = memStore.getState();
// start RM
MockRM rm1 = new MockRM(conf, memStore);
rm1.start();
MockNM nm1 =
new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
nm1.registerNode();
// create an app and finish the app.
RMApp app0 = rm1.submitApp(200);
MockAM am0 = launchAM(app0, rm1, nm1);
finishApplicationMaster(app0, rm1, nm1, am0);
MockRM rm2 = new MockRM(conf, memStore);
rm2.start();
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
nm1 = rm2.registerNode("127.0.0.1:1234", 15120);
Map<ApplicationId, ApplicationState> rmAppState =
rmState.getApplicationState();
// app0 exits in both state store and rmContext
Assert.assertEquals(RMAppState.FINISHED,
rmAppState.get(app0.getApplicationId()).getState());
rm2.waitForState(app0.getApplicationId(), RMAppState.FINISHED);
// create one more app and finish the app.
RMApp app1 = rm2.submitApp(200);
MockAM am1 = launchAM(app1, rm2, nm1);
finishApplicationMaster(app1, rm2, nm1, am1);
// the first app0 get kicked out from both rmContext and state store
Assert.assertNull(rm2.getRMContext().getRMApps()
.get(app0.getApplicationId()));
Assert.assertNull(rmAppState.get(app0.getApplicationId()));
rm1.stop();
rm2.stop();
}
// This is to test RM does not get hang on shutdown.
@Test (timeout = 10000)
public void testRMShutdown() throws Exception {
MemoryRMStateStore memStore = new MemoryRMStateStore() {
@Override
public synchronized void checkVersion()
throws Exception {
throw new Exception("Invalid version.");
}
};
// start RM
memStore.init(conf);
MockRM rm1 = null;
try {
rm1 = new MockRM(conf, memStore);
rm1.start();
Assert.fail();
} catch (Exception e) {
Assert.assertTrue(e.getMessage().contains("Invalid version."));
}
Assert.assertTrue(rm1.getServiceState() == STATE.STOPPED);
}
public static class TestSecurityMockRM extends MockRM {
public TestSecurityMockRM(Configuration conf, RMStateStore store) {

View File

@ -26,6 +26,7 @@
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
@ -109,6 +110,7 @@ interface RMStateStoreHelper {
boolean isFinalStateValid() throws Exception;
void writeVersion(RMStateVersion version) throws Exception;
RMStateVersion getCurrentVersion() throws Exception;
boolean appExists(RMApp app) throws Exception;
}
void waitNotify(TestDispatcher dispatcher) {
@ -128,7 +130,7 @@ void waitNotify(TestDispatcher dispatcher) {
dispatcher.notified = false;
}
void storeApp(RMStateStore store, ApplicationId appId, long submitTime,
RMApp storeApp(RMStateStore store, ApplicationId appId, long submitTime,
long startTime) throws Exception {
ApplicationSubmissionContext context =
new ApplicationSubmissionContextPBImpl();
@ -141,6 +143,7 @@ void storeApp(RMStateStore store, ApplicationId appId, long submitTime,
when(mockApp.getApplicationSubmissionContext()).thenReturn(context);
when(mockApp.getUser()).thenReturn("test");
store.storeNewApplication(mockApp);
return mockApp;
}
ContainerId storeAttempt(RMStateStore store, ApplicationAttemptId attemptId,
@ -370,6 +373,7 @@ public void testRMDTSecretManagerStateStore(
Assert.assertEquals(keySet, secretManagerState.getMasterKeyState());
Assert.assertEquals(sequenceNumber,
secretManagerState.getDTSequenceNumber());
store.close();
}
private Token<AMRMTokenIdentifier> generateAMRMToken(
@ -415,4 +419,43 @@ public void testCheckVersion(RMStateStoreHelper stateStoreHelper)
Assert.assertTrue(t instanceof RMStateVersionIncompatibleException);
}
}
public void testAppDeletion(RMStateStoreHelper stateStoreHelper)
throws Exception {
RMStateStore store = stateStoreHelper.getRMStateStore();
store.setRMDispatcher(new TestDispatcher());
// create and store apps
ArrayList<RMApp> appList = new ArrayList<RMApp>();
int NUM_APPS = 5;
for (int i = 0; i < NUM_APPS; i++) {
ApplicationId appId = ApplicationId.newInstance(1383183338, i);
RMApp app = storeApp(store, appId, 123456789, 987654321);
appList.add(app);
}
Assert.assertEquals(NUM_APPS, appList.size());
for (RMApp app : appList) {
// wait for app to be stored.
while (true) {
if (stateStoreHelper.appExists(app)) {
break;
} else {
Thread.sleep(100);
}
}
}
for (RMApp app : appList) {
// remove the app
store.removeApplication(app);
// wait for app to be removed.
while (true) {
if (!stateStoreHelper.appExists(app)) {
break;
} else {
Thread.sleep(100);
}
}
}
}
}

View File

@ -20,6 +20,7 @@
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;
import junit.framework.Assert;
@ -38,6 +39,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.RMStateVersion;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.RMStateVersionPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.junit.Test;
@ -69,6 +71,13 @@ public Path getVersionNode() {
public RMStateVersion getCurrentVersion() {
return CURRENT_VERSION_INFO;
}
public Path getAppDir(String appId) {
Path rootDir = new Path(workingDirPathURI, ROOT_DIR_NAME);
Path appRootDir = new Path(rootDir, RM_APP_ROOT);
Path appDir = new Path(appRootDir, appId);
return appDir;
}
}
public TestFSRMStateStoreTester(MiniDFSCluster cluster) throws Exception {
@ -109,9 +118,16 @@ public void writeVersion(RMStateVersion version) throws Exception {
public RMStateVersion getCurrentVersion() throws Exception {
return store.getCurrentVersion();
}
public boolean appExists(RMApp app) throws IOException {
FileSystem fs = cluster.getFileSystem();
Path nodePath =
store.getAppDir(app.getApplicationId().toString());
return fs.exists(nodePath);
}
}
@Test
@Test(timeout = 60000)
public void testFSRMStateStore() throws Exception {
HdfsConfiguration conf = new HdfsConfiguration();
MiniDFSCluster cluster =
@ -126,11 +142,8 @@ public void testFSRMStateStore() throws Exception {
String appAttemptIdStr3 = "appattempt_1352994193343_0001_000003";
ApplicationAttemptId attemptId3 =
ConverterUtils.toApplicationAttemptId(appAttemptIdStr3);
Path rootDir =
new Path(fileSystemRMStateStore.fsWorkingPath, "FSRMStateRoot");
Path appRootDir = new Path(rootDir, "RMAppRoot");
Path appDir =
new Path(appRootDir, attemptId3.getApplicationId().toString());
fsTester.store.getAppDir(attemptId3.getApplicationId().toString());
Path tempAppAttemptFile =
new Path(appDir, attemptId3.toString() + ".tmp");
fsOut = fileSystemRMStateStore.fs.create(tempAppAttemptFile, false);
@ -138,10 +151,11 @@ public void testFSRMStateStore() throws Exception {
fsOut.close();
testRMAppStateStore(fsTester);
Assert.assertFalse(fileSystemRMStateStore.fsWorkingPath
Assert.assertFalse(fsTester.workingDirPathURI
.getFileSystem(conf).exists(tempAppAttemptFile));
testRMDTSecretManagerStateStore(fsTester);
testCheckVersion(fsTester);
testAppDeletion(fsTester);
} finally {
cluster.shutdown();
}

View File

@ -46,7 +46,9 @@
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.RMStateVersion;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.RMStateVersionPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.data.Stat;
import org.junit.Test;
public class TestZKRMStateStore extends RMStateStoreTestBase {
@ -57,6 +59,7 @@ class TestZKRMStateStoreTester implements RMStateStoreHelper {
ZooKeeper client;
TestZKRMStateStoreInternal store;
String workingZnode;
class TestZKRMStateStoreInternal extends ZKRMStateStore {
@ -79,11 +82,16 @@ public String getVersionNode() {
public RMStateVersion getCurrentVersion() {
return CURRENT_VERSION_INFO;
}
public String getAppNode(String appId) {
return workingZnode + "/" + ROOT_ZNODE_NAME + "/" + RM_APP_ROOT + "/"
+ appId;
}
}
public RMStateStore getRMStateStore() throws Exception {
String workingZnode = "/Test";
Configuration conf = new YarnConfiguration();
YarnConfiguration conf = new YarnConfiguration();
workingZnode = "/Test";
conf.set(YarnConfiguration.ZK_RM_STATE_STORE_ADDRESS, hostPort);
conf.set(YarnConfiguration.ZK_RM_STATE_STORE_PARENT_PATH, workingZnode);
this.client = createClient();
@ -107,14 +115,22 @@ public void writeVersion(RMStateVersion version) throws Exception {
public RMStateVersion getCurrentVersion() throws Exception {
return store.getCurrentVersion();
}
public boolean appExists(RMApp app) throws Exception {
Stat node =
client.exists(store.getAppNode(app.getApplicationId().toString()),
false);
return node !=null;
}
}
@Test
@Test (timeout = 60000)
public void testZKRMStateStoreRealZK() throws Exception {
TestZKRMStateStoreTester zkTester = new TestZKRMStateStoreTester();
testRMAppStateStore(zkTester);
testRMDTSecretManagerStateStore(zkTester);
testCheckVersion(zkTester);
testAppDeletion(zkTester);
}
private Configuration createHARMConf(

View File

@ -120,7 +120,7 @@ public void testZKClientRetry() throws Exception {
TestZKClient zkClientTester = new TestZKClient();
final String path = "/test";
YarnConfiguration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.ZK_RM_STATE_STORE_TIMEOUT_MS, 100);
conf.setInt(YarnConfiguration.ZK_RM_STATE_STORE_TIMEOUT_MS, 1000);
conf.setLong(YarnConfiguration.ZK_RM_STATE_STORE_RETRY_INTERVAL_MS, 100);
final ZKRMStateStore store =
(ZKRMStateStore) zkClientTester.getRMStateStore(conf);

View File

@ -651,5 +651,35 @@ public void run() {
}
assertFalse(failed.get());
}
@Test
public void testGetAppsInQueue() throws Exception {
Application application_0 = new Application("user_0", "a1", resourceManager);
application_0.submit();
Application application_1 = new Application("user_0", "a2", resourceManager);
application_1.submit();
Application application_2 = new Application("user_0", "b2", resourceManager);
application_2.submit();
ResourceScheduler scheduler = resourceManager.getResourceScheduler();
List<ApplicationAttemptId> appsInA1 = scheduler.getAppsInQueue("a1");
assertEquals(1, appsInA1.size());
List<ApplicationAttemptId> appsInA = scheduler.getAppsInQueue("a");
assertTrue(appsInA.contains(application_0.getApplicationAttemptId()));
assertTrue(appsInA.contains(application_1.getApplicationAttemptId()));
assertEquals(2, appsInA.size());
List<ApplicationAttemptId> appsInRoot = scheduler.getAppsInQueue("root");
assertTrue(appsInRoot.contains(application_0.getApplicationAttemptId()));
assertTrue(appsInRoot.contains(application_1.getApplicationAttemptId()));
assertTrue(appsInRoot.contains(application_2.getApplicationAttemptId()));
assertEquals(3, appsInRoot.size());
Assert.assertNull(scheduler.getAppsInQueue("nonexistentqueue"));
}
}

View File

@ -79,6 +79,7 @@
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestCapacityScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
@ -2490,4 +2491,40 @@ public void testBlacklistNodes() throws Exception {
assertEquals("Incorrect number of containers allocated", 1, app
.getLiveContainers().size());
}
@Test
public void testGetAppsInQueue() throws Exception {
scheduler.reinitialize(conf, resourceManager.getRMContext());
ApplicationAttemptId appAttId1 =
createSchedulingRequest(1024, 1, "queue1.subqueue1", "user1");
ApplicationAttemptId appAttId2 =
createSchedulingRequest(1024, 1, "queue1.subqueue2", "user1");
ApplicationAttemptId appAttId3 =
createSchedulingRequest(1024, 1, "default", "user1");
List<ApplicationAttemptId> apps =
scheduler.getAppsInQueue("queue1.subqueue1");
assertEquals(1, apps.size());
assertEquals(appAttId1, apps.get(0));
// with and without root prefix should work
apps = scheduler.getAppsInQueue("root.queue1.subqueue1");
assertEquals(1, apps.size());
assertEquals(appAttId1, apps.get(0));
apps = scheduler.getAppsInQueue("user1");
assertEquals(1, apps.size());
assertEquals(appAttId3, apps.get(0));
// with and without root prefix should work
apps = scheduler.getAppsInQueue("root.user1");
assertEquals(1, apps.size());
assertEquals(appAttId3, apps.get(0));
// apps in subqueues should be included
apps = scheduler.getAppsInQueue("queue1");
Assert.assertEquals(2, apps.size());
Set<ApplicationAttemptId> appAttIds = Sets.newHashSet(apps.get(0), apps.get(1));
assertTrue(appAttIds.contains(appAttId1));
assertTrue(appAttIds.contains(appAttId2));
}
}

View File

@ -44,7 +44,7 @@ public void setUp() throws IOException {
Configuration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.RM_SCHEDULER, FairScheduler.class,
ResourceScheduler.class);
conf.set("mapred.fairscheduler.eventlog.enabled", "true");
conf.set("yarn.scheduler.fair.event-log-enabled", "true");
// All tests assume only one assignment per node update
conf.set(FairSchedulerConfiguration.ASSIGN_MULTIPLE, "false");

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.lang.reflect.Method;
@ -555,6 +556,24 @@ public void testBlackListNodes() throws Exception {
Assert.assertFalse(fs.getApplication(appAttemptId).isBlacklisted(host));
rm.stop();
}
@Test
public void testGetAppsInQueue() throws Exception {
Application application_0 = new Application("user_0", resourceManager);
application_0.submit();
Application application_1 = new Application("user_0", resourceManager);
application_1.submit();
ResourceScheduler scheduler = resourceManager.getResourceScheduler();
List<ApplicationAttemptId> appsInDefault = scheduler.getAppsInQueue("default");
assertTrue(appsInDefault.contains(application_0.getApplicationAttemptId()));
assertTrue(appsInDefault.contains(application_1.getApplicationAttemptId()));
assertEquals(2, appsInDefault.size());
Assert.assertNull(scheduler.getAppsInQueue("someotherqueue"));
}
private void checkApplicationResourceUsage(int expected,
Application application) {

View File

@ -25,18 +25,21 @@
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.http.HttpConfig;
import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.service.CompositeService;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.Shell.ShellCommandExecutor;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.conf.HAUtil;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
@ -87,7 +90,7 @@ public class MiniYARNCluster extends CompositeService {
}
private NodeManager[] nodeManagers;
private ResourceManager resourceManager;
private ResourceManager[] resourceManagers;
private ResourceManagerWrapper resourceManagerWrapper;
@ -103,12 +106,14 @@ public class MiniYARNCluster extends CompositeService {
/**
* @param testName name of the test
* @param noOfNodeManagers the number of node managers in the cluster
* @param numResourceManagers the number of resource managers in the cluster
* @param numNodeManagers the number of node managers in the cluster
* @param numLocalDirs the number of nm-local-dirs per nodemanager
* @param numLogDirs the number of nm-log-dirs per nodemanager
*/
public MiniYARNCluster(String testName, int noOfNodeManagers,
int numLocalDirs, int numLogDirs) {
public MiniYARNCluster(
String testName, int numResourceManagers, int numNodeManagers,
int numLocalDirs, int numLogDirs) {
super(testName.replace("$", ""));
this.numLocalDirs = numLocalDirs;
this.numLogDirs = numLogDirs;
@ -157,28 +162,103 @@ public MiniYARNCluster(String testName, int noOfNodeManagers,
this.testWorkDir = targetWorkDir;
}
resourceManagerWrapper = new ResourceManagerWrapper();
addService(resourceManagerWrapper);
nodeManagers = new CustomNodeManager[noOfNodeManagers];
for(int index = 0; index < noOfNodeManagers; index++) {
resourceManagers = new ResourceManager[numResourceManagers];
for (int i = 0; i < numResourceManagers; i++) {
resourceManagers[i] = new ResourceManager();
addService(new ResourceManagerWrapper(i));
}
nodeManagers = new CustomNodeManager[numNodeManagers];
for(int index = 0; index < numNodeManagers; index++) {
addService(new NodeManagerWrapper(index));
nodeManagers[index] = new CustomNodeManager();
}
}
@Override
/**
* @param testName name of the test
* @param numNodeManagers the number of node managers in the cluster
* @param numLocalDirs the number of nm-local-dirs per nodemanager
* @param numLogDirs the number of nm-log-dirs per nodemanager
*/
public MiniYARNCluster(String testName, int numNodeManagers,
int numLocalDirs, int numLogDirs) {
this(testName, 1, numNodeManagers, numLocalDirs, numLogDirs);
}
@Override
public void serviceInit(Configuration conf) throws Exception {
super.serviceInit(conf instanceof YarnConfiguration ? conf
: new YarnConfiguration(
conf));
if (resourceManagers.length > 1) {
conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
StringBuilder rmIds = new StringBuilder();
for (int i = 0; i < resourceManagers.length; i++) {
if (i != 0) {
rmIds.append(",");
}
rmIds.append("rm" + i);
}
conf.set(YarnConfiguration.RM_HA_IDS, rmIds.toString());
}
super.serviceInit(
conf instanceof YarnConfiguration ? conf : new YarnConfiguration(conf));
}
public File getTestWorkDir() {
return testWorkDir;
}
/**
* In a HA cluster, go through all the RMs and find the Active RM. If none
* of them are active, wait upto 5 seconds for them to transition to Active.
*
* In an non-HA cluster, return the index of the only RM.
*
* @return index of the active RM
*/
@InterfaceAudience.Private
@VisibleForTesting
int getActiveRMIndex() {
if (resourceManagers.length == 1) {
return 0;
}
int numRetriesForRMBecomingActive = 5;
while (numRetriesForRMBecomingActive-- > 0) {
for (int i = 0; i < resourceManagers.length; i++) {
try {
if (HAServiceProtocol.HAServiceState.ACTIVE ==
resourceManagers[i].getRMContext().getRMAdminService()
.getServiceStatus().getState()) {
return i;
}
} catch (IOException e) {
throw new YarnRuntimeException("Couldn't read the status of " +
"a ResourceManger in the HA ensemble.", e);
}
}
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
throw new YarnRuntimeException("Interrupted while waiting for one " +
"of the ResourceManagers to become active");
}
}
return -1;
}
/**
* @return the active {@link ResourceManager} of the cluster,
* null if none of them are active.
*/
public ResourceManager getResourceManager() {
return this.resourceManager;
int activeRMIndex = getActiveRMIndex();
return activeRMIndex == -1
? null
: this.resourceManagers[getActiveRMIndex()];
}
public ResourceManager getResourceManager(int i) {
return this.resourceManagers[i];
}
public NodeManager getNodeManager(int i) {
@ -195,8 +275,29 @@ public static String getHostname() {
}
private class ResourceManagerWrapper extends AbstractService {
public ResourceManagerWrapper() {
super(ResourceManagerWrapper.class.getName());
private int index;
public ResourceManagerWrapper(int i) {
super(ResourceManagerWrapper.class.getName() + "_" + i);
index = i;
}
private void setNonHARMConfiguration(Configuration conf) {
String hostname = MiniYARNCluster.getHostname();
conf.set(YarnConfiguration.RM_ADDRESS, hostname + ":0");
conf.set(YarnConfiguration.RM_ADMIN_ADDRESS, hostname + ":0");
conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, hostname + ":0");
conf.set(YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS, hostname + ":0");
WebAppUtils.setRMWebAppHostnameAndPort(conf, hostname, 0);
}
private void setHARMConfiguration(Configuration conf) {
String rmId = "rm" + index;
String hostname = MiniYARNCluster.getHostname();
conf.set(YarnConfiguration.RM_HA_ID, rmId);
for (String confKey : YarnConfiguration.RM_RPC_ADDRESS_CONF_KEYS) {
conf.set(HAUtil.addSuffix(confKey, rmId), hostname + ":0");
}
}
@Override
@ -206,22 +307,15 @@ protected synchronized void serviceInit(Configuration conf)
if (!conf.getBoolean(
YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS,
YarnConfiguration.DEFAULT_YARN_MINICLUSTER_FIXED_PORTS)) {
// pick free random ports.
String hostname = MiniYARNCluster.getHostname();
conf.set(YarnConfiguration.RM_ADDRESS, hostname + ":0");
conf.set(YarnConfiguration.RM_ADMIN_ADDRESS, hostname + ":0");
conf.set(YarnConfiguration.RM_SCHEDULER_ADDRESS, hostname + ":0");
conf.set(YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS, hostname + ":0");
WebAppUtils.setRMWebAppHostnameAndPort(conf, hostname, 0);
if (HAUtil.isHAEnabled(conf)) {
setHARMConfiguration(conf);
} else {
setNonHARMConfiguration(conf);
}
}
resourceManager = new ResourceManager() {
@Override
protected void doSecureLogin() throws IOException {
// Don't try to login using keytab in the testcase.
};
};
resourceManager.init(conf);
resourceManager.getRMContext().getDispatcher().register(RMAppAttemptEventType.class,
resourceManagers[index].init(conf);
resourceManagers[index].getRMContext().getDispatcher().register
(RMAppAttemptEventType.class,
new EventHandler<RMAppAttemptEvent>() {
public void handle(RMAppAttemptEvent event) {
if (event instanceof RMAppAttemptRegistrationEvent) {
@ -239,20 +333,20 @@ protected synchronized void serviceStart() throws Exception {
try {
new Thread() {
public void run() {
resourceManager.start();
};
resourceManagers[index].start();
}
}.start();
int waitCount = 0;
while (resourceManager.getServiceState() == STATE.INITED
while (resourceManagers[index].getServiceState() == STATE.INITED
&& waitCount++ < 60) {
LOG.info("Waiting for RM to start...");
Thread.sleep(1500);
}
if (resourceManager.getServiceState() != STATE.STARTED) {
if (resourceManagers[index].getServiceState() != STATE.STARTED) {
// RM could have failed.
throw new IOException(
"ResourceManager failed to start. Final state is "
+ resourceManager.getServiceState());
+ resourceManagers[index].getServiceState());
}
super.serviceStart();
} catch (Throwable t) {
@ -278,9 +372,9 @@ private void waitForAppMastersToFinish(long timeoutMillis) throws InterruptedExc
@Override
protected synchronized void serviceStop() throws Exception {
if (resourceManager != null) {
if (resourceManagers[index] != null) {
waitForAppMastersToFinish(5000);
resourceManager.stop();
resourceManagers[index].stop();
}
super.serviceStop();
@ -372,7 +466,7 @@ protected synchronized void serviceStart() throws Exception {
new Thread() {
public void run() {
nodeManagers[index].start();
};
}
}.start();
int waitCount = 0;
while (nodeManagers[index].getServiceState() == STATE.INITED
@ -398,12 +492,12 @@ protected synchronized void serviceStop() throws Exception {
super.serviceStop();
}
}
private class CustomNodeManager extends NodeManager {
@Override
protected void doSecureLogin() throws IOException {
// Don't try to login using keytab in the testcase.
};
}
@Override
protected NodeStatusUpdater createNodeStatusUpdater(Context context,
@ -412,8 +506,8 @@ protected NodeStatusUpdater createNodeStatusUpdater(Context context,
healthChecker, metrics) {
@Override
protected ResourceTracker getRMClient() {
final ResourceTrackerService rt = resourceManager
.getResourceTrackerService();
final ResourceTrackerService rt =
getResourceManager().getResourceTrackerService();
final RecordFactory recordFactory =
RecordFactoryProvider.getRecordFactory(null);
@ -424,8 +518,7 @@ protected ResourceTracker getRMClient() {
public NodeHeartbeatResponse nodeHeartbeat(
NodeHeartbeatRequest request) throws YarnException,
IOException {
NodeHeartbeatResponse response = recordFactory.newRecordInstance(
NodeHeartbeatResponse.class);
NodeHeartbeatResponse response;
try {
response = rt.nodeHeartbeat(request);
} catch (YarnException e) {
@ -440,8 +533,7 @@ public NodeHeartbeatResponse nodeHeartbeat(
public RegisterNodeManagerResponse registerNodeManager(
RegisterNodeManagerRequest request)
throws YarnException, IOException {
RegisterNodeManagerResponse response = recordFactory.
newRecordInstance(RegisterNodeManagerResponse.class);
RegisterNodeManagerResponse response;
try {
response = rt.registerNodeManager(request);
} catch (YarnException e) {
@ -452,13 +544,11 @@ public RegisterNodeManagerResponse registerNodeManager(
return response;
}
};
};
}
@Override
protected void stopRMProxy() {
return;
}
protected void stopRMProxy() { }
};
};
}
}
}

View File

@ -0,0 +1,71 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.resourcemanager.AdminService;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotSame;
import static org.junit.Assert.fail;
public class TestMiniYARNClusterForHA {
MiniYARNCluster cluster;
@Before
public void setup() throws IOException, InterruptedException {
Configuration conf = new YarnConfiguration();
cluster = new MiniYARNCluster(TestMiniYARNClusterForHA.class.getName(),
2, 1, 1, 1);
cluster.init(conf);
cluster.start();
cluster.getResourceManager(0).getRMContext().getRMAdminService()
.transitionToActive(new HAServiceProtocol.StateChangeRequestInfo(
HAServiceProtocol.RequestSource.REQUEST_BY_USER));
assertFalse("RM never turned active", -1 == cluster.getActiveRMIndex());
}
@Test
public void testClusterWorks() throws YarnException, InterruptedException {
ResourceManager rm = cluster.getResourceManager(0);
GetClusterMetricsRequest req = GetClusterMetricsRequest.newInstance();
for (int i = 0; i < 600; i++) {
if (1 == rm.getClientRMService().getClusterMetrics(req)
.getClusterMetrics().getNumNodeManagers()) {
return;
}
Thread.sleep(100);
}
fail("NodeManager never registered with the RM");
}
}