From bbff44cb03d0150f990acc3b77170893241cc282 Mon Sep 17 00:00:00 2001 From: Alejandro Abdelnur Date: Tue, 9 Sep 2014 22:16:42 -0700 Subject: [PATCH 01/13] HDFS-6776. Using distcp to copy data between insecure and secure cluster via webdhfs doesn't work. (yzhangal via tucu) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../DelegationTokenSecretManager.java | 3 +- .../web/resources/NamenodeWebHdfsMethods.java | 3 ++ .../hadoop/hdfs/web/WebHdfsFileSystem.java | 18 +++++++- .../apache/hadoop/hdfs/web/TestWebHDFS.java | 41 +++++++++++++++++++ 5 files changed, 65 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 0b914ac43e..fa00d448d8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -739,6 +739,9 @@ Release 2.6.0 - UNRELEASED HDFS-6986. DistributedFileSystem must get delegation tokens from configured KeyProvider. (zhz via tucu) + HDFS-6776. Using distcp to copy data between insecure and secure cluster via webdhfs + doesn't work. (yzhangal via tucu) + Release 2.5.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java index 175e3eddf1..8af7ebaa0b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java @@ -402,8 +402,7 @@ public static Credentials createCredentials(final NameNode namenode, final Token token = namenode.getRpcServer( ).getDelegationToken(new Text(renewer)); if (token == null) { - throw new IOException("Failed to get the token for " + renewer - + ", user=" + ugi.getShortUserName()); + return null; } final InetSocketAddress addr = namenode.getNameNodeAddress(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java index 991885b2e4..3949fbdc53 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java @@ -283,6 +283,9 @@ private Token generateDelegationToken( final String renewer) throws IOException { final Credentials c = DelegationTokenSecretManager.createCredentials( namenode, ugi, renewer != null? renewer: ugi.getShortUserName()); + if (c == null) { + return null; + } final Token t = c.getAllTokens().iterator().next(); Text kind = request.getScheme().equals("http") ? WebHdfsFileSystem.TOKEN_KIND : SWebHdfsFileSystem.TOKEN_KIND; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java index cf6233f5a3..40312ec866 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java @@ -41,6 +41,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.DelegationTokenRenewer; import org.apache.hadoop.fs.FSDataInputStream; @@ -102,6 +103,11 @@ public class WebHdfsFileSystem extends FileSystem /** Delegation token kind */ public static final Text TOKEN_KIND = new Text("WEBHDFS delegation"); + + @VisibleForTesting + public static final String CANT_FALLBACK_TO_INSECURE_MSG = + "The client is configured to only allow connecting to secure cluster"; + private boolean canRefreshDelegationToken; private UserGroupInformation ugi; @@ -112,6 +118,7 @@ public class WebHdfsFileSystem extends FileSystem private Path workingDir; private InetSocketAddress nnAddrs[]; private int currentNNAddrIndex; + private boolean disallowFallbackToInsecureCluster; /** * Return the protocol scheme for the FileSystem. @@ -194,6 +201,9 @@ public synchronized void initialize(URI uri, Configuration conf this.workingDir = getHomeDirectory(); this.canRefreshDelegationToken = UserGroupInformation.isSecurityEnabled(); + this.disallowFallbackToInsecureCluster = !conf.getBoolean( + CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY, + CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_DEFAULT); this.delegationToken = null; } @@ -1293,7 +1303,13 @@ Token decodeResponse(Map json) return JsonUtil.toDelegationToken(json); } }.run(); - token.setService(tokenServiceName); + if (token != null) { + token.setService(tokenServiceName); + } else { + if (disallowFallbackToInsecureCluster) { + throw new AccessControlException(CANT_FALLBACK_TO_INSECURE_MSG); + } + } return token; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java index 14312110aa..eec49d848b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java @@ -29,6 +29,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -45,6 +46,7 @@ import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.ipc.RetriableException; +import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; @@ -482,4 +484,43 @@ public void testRaceWhileNNStartup() throws Exception { } } } + + @Test + public void testDTInInsecureClusterWithFallback() + throws IOException, URISyntaxException { + MiniDFSCluster cluster = null; + final Configuration conf = WebHdfsTestUtil.createConf(); + conf.setBoolean(CommonConfigurationKeys + .IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY, true); + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); + final FileSystem webHdfs = WebHdfsTestUtil.getWebHdfsFileSystem(conf, + WebHdfsFileSystem.SCHEME); + Assert.assertNull(webHdfs.getDelegationToken(null)); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } + + @Test + public void testDTInInsecureCluster() throws Exception { + MiniDFSCluster cluster = null; + final Configuration conf = WebHdfsTestUtil.createConf(); + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); + final FileSystem webHdfs = WebHdfsTestUtil.getWebHdfsFileSystem(conf, + WebHdfsFileSystem.SCHEME); + webHdfs.getDelegationToken(null); + fail("No exception is thrown."); + } catch (AccessControlException ace) { + Assert.assertTrue(ace.getMessage().startsWith( + WebHdfsFileSystem.CANT_FALLBACK_TO_INSECURE_MSG)); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } } From 9ee891aa90333bf18cba412400daa5834f15c41d Mon Sep 17 00:00:00 2001 From: Alejandro Abdelnur Date: Tue, 9 Sep 2014 22:18:03 -0700 Subject: [PATCH 02/13] HADOOP-11077. NPE if hosts not specified in ProxyUsers. (gchanan via tucu) --- hadoop-common-project/hadoop-common/CHANGES.txt | 2 ++ .../authorize/DefaultImpersonationProvider.java | 2 +- .../hadoop/security/authorize/TestProxyUsers.java | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index c60a9b701e..b0150873cd 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -777,6 +777,8 @@ Release 2.6.0 - UNRELEASED HADOOP-10925. Compilation fails in native link0 function on Windows. (cnauroth) + HADOOP-11077. NPE if hosts not specified in ProxyUsers. (gchanan via tucu) + Release 2.5.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/DefaultImpersonationProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/DefaultImpersonationProvider.java index ab1c390f46..b36ac80717 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/DefaultImpersonationProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/DefaultImpersonationProvider.java @@ -123,7 +123,7 @@ public void authorize(UserGroupInformation user, MachineList MachineList = proxyHosts.get( getProxySuperuserIpConfKey(realUser.getShortUserName())); - if(!MachineList.includes(remoteAddress)) { + if(MachineList == null || !MachineList.includes(remoteAddress)) { throw new AuthorizationException("Unauthorized connection for super-user: " + realUser.getUserName() + " from IP " + remoteAddress); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyUsers.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyUsers.java index dbcac676fa..8ff4bfb108 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyUsers.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/authorize/TestProxyUsers.java @@ -478,6 +478,21 @@ public void testProxyUsersWithCustomPrefix() throws Exception { assertNotAuthorized(proxyUserUgi, "1.2.3.5"); } + @Test + public void testNoHostsForUsers() throws Exception { + Configuration conf = new Configuration(false); + conf.set("y." + REAL_USER_NAME + ".users", + StringUtils.join(",", Arrays.asList(AUTHORIZED_PROXY_USER_NAME))); + ProxyUsers.refreshSuperUserGroupsConfiguration(conf, "y"); + + UserGroupInformation realUserUgi = UserGroupInformation + .createRemoteUser(REAL_USER_NAME); + UserGroupInformation proxyUserUgi = UserGroupInformation.createProxyUserForTesting( + AUTHORIZED_PROXY_USER_NAME, realUserUgi, GROUP_NAMES); + + // IP doesn't matter + assertNotAuthorized(proxyUserUgi, "1.2.3.4"); + } private void assertNotAuthorized(UserGroupInformation proxyUgi, String host) { try { From b100949404843ed245ef4e118291f55b3fdc81b8 Mon Sep 17 00:00:00 2001 From: Alejandro Abdelnur Date: Tue, 9 Sep 2014 22:19:42 -0700 Subject: [PATCH 03/13] HADOOP-9989. Bug introduced in HADOOP-9374, which parses the -tokenCacheFile as binary file but set it to the configuration as JSON file. (zxu via tucu) --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../main/java/org/apache/hadoop/util/GenericOptionsParser.java | 2 +- .../java/org/apache/hadoop/util/TestGenericOptionsParser.java | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index b0150873cd..b2157d6f9e 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -779,6 +779,9 @@ Release 2.6.0 - UNRELEASED HADOOP-11077. NPE if hosts not specified in ProxyUsers. (gchanan via tucu) + HADOOP-9989. Bug introduced in HADOOP-9374, which parses the -tokenCacheFile + as binary file but set it to the configuration as JSON file. (zxu via tucu) + Release 2.5.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java index 18acbf109a..2a37dac460 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java @@ -332,7 +332,7 @@ private void processGeneralOptions(Configuration conf, } UserGroupInformation.getCurrentUser().addCredentials( Credentials.readTokenStorageFile(p, conf)); - conf.set("mapreduce.job.credentials.json", p.toString(), + conf.set("mapreduce.job.credentials.binary", p.toString(), "from -tokenCacheFile command line option"); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericOptionsParser.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericOptionsParser.java index 779318acc8..2bc19154f4 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericOptionsParser.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestGenericOptionsParser.java @@ -249,7 +249,7 @@ public void testTokenCacheOption() throws IOException { creds.writeTokenStorageFile(tmpPath, conf); new GenericOptionsParser(conf, args); - String fileName = conf.get("mapreduce.job.credentials.json"); + String fileName = conf.get("mapreduce.job.credentials.binary"); assertNotNull("files is null", fileName); assertEquals("files option does not match", tmpPath.toString(), fileName); From 3072c83b38fd87318d502a7d1bc518963b5ccdf7 Mon Sep 17 00:00:00 2001 From: Karthik Kambatla Date: Wed, 10 Sep 2014 08:26:14 -0700 Subject: [PATCH 04/13] YARN-1458. FairScheduler: Zero weight can lead to livelock. (Zhihai Xu via kasha) --- hadoop-yarn-project/CHANGES.txt | 3 + .../fair/policies/ComputeFairShares.java | 87 +++++++++-- .../scheduler/fair/TestFairScheduler.java | 145 +++++++++++++++++- 3 files changed, 218 insertions(+), 17 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 7eaf1c805d..5734b98702 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -305,6 +305,9 @@ Release 2.6.0 - UNRELEASED YARN-2526. SLS can deadlock when all the threads are taken by AMSimulators. (Wei Yan via kasha) + YARN-1458. FairScheduler: Zero weight can lead to livelock. + (Zhihai Xu via kasha) + Release 2.5.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java index 6836758019..12ddde2ed3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java @@ -48,16 +48,7 @@ public class ComputeFairShares { public static void computeShares( Collection schedulables, Resource totalResources, ResourceType type) { - Collection activeSchedulables = new ArrayList(); - for (Schedulable sched : schedulables) { - if ((sched instanceof FSQueue) && !((FSQueue) sched).isActive()) { - setResourceValue(0, sched.getFairShare(), type); - } else { - activeSchedulables.add(sched); - } - } - - computeSharesInternal(activeSchedulables, totalResources, type, false); + computeSharesInternal(schedulables, totalResources, type, false); } /** @@ -117,8 +108,13 @@ public static void computeSteadyShares( * iterations of binary search is a constant (dependent on desired precision). */ private static void computeSharesInternal( - Collection schedulables, Resource totalResources, - ResourceType type, boolean isSteadyShare) { + Collection allSchedulables, + Resource totalResources, ResourceType type, boolean isSteadyShare) { + + Collection schedulables = new ArrayList(); + int takenResources = handleFixedFairShares( + allSchedulables, schedulables, isSteadyShare, type); + if (schedulables.isEmpty()) { return; } @@ -135,9 +131,11 @@ private static void computeSharesInternal( totalMaxShare += maxShare; } } - int totalResource = Math.min(totalMaxShare, - getResourceValue(totalResources, type)); - + + int totalResource = Math.max((getResourceValue(totalResources, type) - + takenResources), 0); + totalResource = Math.min(totalMaxShare, totalResource); + double rMax = 1.0; while (resourceUsedWithWeightToResourceRatio(rMax, schedulables, type) < totalResource) { @@ -196,7 +194,64 @@ private static int computeShare(Schedulable sched, double w2rRatio, share = Math.min(share, getResourceValue(sched.getMaxShare(), type)); return (int) share; } - + + /** + * Helper method to handle Schedulabes with fixed fairshares. + * Returns the resources taken by fixed fairshare schedulables, + * and adds the remaining to the passed nonFixedSchedulables. + */ + private static int handleFixedFairShares( + Collection schedulables, + Collection nonFixedSchedulables, + boolean isSteadyShare, ResourceType type) { + int totalResource = 0; + + for (Schedulable sched : schedulables) { + int fixedShare = getFairShareIfFixed(sched, isSteadyShare, type); + if (fixedShare < 0) { + nonFixedSchedulables.add(sched); + } else { + setResourceValue(fixedShare, + isSteadyShare + ? ((FSQueue)sched).getSteadyFairShare() + : sched.getFairShare(), + type); + totalResource = (int) Math.min((long)totalResource + (long)fixedShare, + Integer.MAX_VALUE); + } + } + return totalResource; + } + + /** + * Get the fairshare for the {@link Schedulable} if it is fixed, -1 otherwise. + * + * The fairshare is fixed if either the maxShare is 0, weight is 0, + * or the Schedulable is not active for instantaneous fairshare. + */ + private static int getFairShareIfFixed(Schedulable sched, + boolean isSteadyShare, ResourceType type) { + + // Check if maxShare is 0 + if (getResourceValue(sched.getMaxShare(), type) <= 0) { + return 0; + } + + // For instantaneous fairshares, check if queue is active + if (!isSteadyShare && + (sched instanceof FSQueue) && !((FSQueue)sched).isActive()) { + return 0; + } + + // Check if weight is 0 + if (sched.getWeights().getWeight(type) <= 0) { + int minShare = getResourceValue(sched.getMinShare(), type); + return (minShare <= 0) ? 0 : minShare; + } + + return -1; + } + private static int getResourceValue(Resource resource, ResourceType type) { switch (type) { case MEMORY: diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 05b1925575..3a0dac3b42 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -307,7 +307,150 @@ public void testSimpleFairShareCalculation() throws IOException { assertEquals(3414, p.getMetrics().getSteadyFairShareMB()); } } - + + @Test + public void testFairShareWithZeroWeight() throws IOException { + conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); + // set queueA and queueB weight zero. + PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); + out.println(""); + out.println(""); + out.println(""); + out.println("0.0"); + out.println(""); + out.println(""); + out.println("0.0"); + out.println(""); + out.println(""); + out.close(); + + scheduler.init(conf); + scheduler.start(); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + // Add one big node (only care about aggregate capacity) + RMNode node1 = + MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1, + "127.0.0.1"); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + scheduler.handle(nodeEvent1); + + // Queue A wants 2 * 1024. + createSchedulingRequest(2 * 1024, "queueA", "user1"); + // Queue B wants 6 * 1024 + createSchedulingRequest(6 * 1024, "queueB", "user1"); + + scheduler.update(); + + FSLeafQueue queue = scheduler.getQueueManager().getLeafQueue( + "queueA", false); + // queueA's weight is 0.0, so its fair share should be 0. + assertEquals(0, queue.getFairShare().getMemory()); + // queueB's weight is 0.0, so its fair share should be 0. + queue = scheduler.getQueueManager().getLeafQueue( + "queueB", false); + assertEquals(0, queue.getFairShare().getMemory()); + } + + @Test + public void testFairShareWithZeroWeightNoneZeroMinRes() throws IOException { + conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); + // set queueA and queueB weight zero. + // set queueA and queueB minResources 1. + PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); + out.println(""); + out.println(""); + out.println(""); + out.println("1 mb 1 vcores"); + out.println("0.0"); + out.println(""); + out.println(""); + out.println("1 mb 1 vcores"); + out.println("0.0"); + out.println(""); + out.println(""); + out.close(); + + scheduler.init(conf); + scheduler.start(); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + // Add one big node (only care about aggregate capacity) + RMNode node1 = + MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1, + "127.0.0.1"); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + scheduler.handle(nodeEvent1); + + // Queue A wants 2 * 1024. + createSchedulingRequest(2 * 1024, "queueA", "user1"); + // Queue B wants 6 * 1024 + createSchedulingRequest(6 * 1024, "queueB", "user1"); + + scheduler.update(); + + FSLeafQueue queue = scheduler.getQueueManager().getLeafQueue( + "queueA", false); + // queueA's weight is 0.0 and minResources is 1, + // so its fair share should be 1 (minShare). + assertEquals(1, queue.getFairShare().getMemory()); + // queueB's weight is 0.0 and minResources is 1, + // so its fair share should be 1 (minShare). + queue = scheduler.getQueueManager().getLeafQueue( + "queueB", false); + assertEquals(1, queue.getFairShare().getMemory()); + } + + @Test + public void testFairShareWithNoneZeroWeightNoneZeroMinRes() + throws IOException { + conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); + // set queueA and queueB weight 0.5. + // set queueA and queueB minResources 1024. + PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); + out.println(""); + out.println(""); + out.println(""); + out.println("1024 mb 1 vcores"); + out.println("0.5"); + out.println(""); + out.println(""); + out.println("1024 mb 1 vcores"); + out.println("0.5"); + out.println(""); + out.println(""); + out.close(); + + scheduler.init(conf); + scheduler.start(); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + // Add one big node (only care about aggregate capacity) + RMNode node1 = + MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1, + "127.0.0.1"); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + scheduler.handle(nodeEvent1); + + // Queue A wants 4 * 1024. + createSchedulingRequest(4 * 1024, "queueA", "user1"); + // Queue B wants 4 * 1024 + createSchedulingRequest(4 * 1024, "queueB", "user1"); + + scheduler.update(); + + FSLeafQueue queue = scheduler.getQueueManager().getLeafQueue( + "queueA", false); + // queueA's weight is 0.5 and minResources is 1024, + // so its fair share should be 4096. + assertEquals(4096, queue.getFairShare().getMemory()); + // queueB's weight is 0.5 and minResources is 1024, + // so its fair share should be 4096. + queue = scheduler.getQueueManager().getLeafQueue( + "queueB", false); + assertEquals(4096, queue.getFairShare().getMemory()); + } + @Test public void testSimpleHierarchicalFairShareCalculation() throws IOException { scheduler.init(conf); From b67d5ba7842cc10695d987f217027848a5a8c3d8 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Mon, 8 Sep 2014 14:48:21 -0700 Subject: [PATCH 05/13] YARN-2448. Changed ApplicationMasterProtocol to expose RM-recognized resource types to the AMs. Contributed by Varun Vasudev. --- hadoop-yarn-project/CHANGES.txt | 3 + .../RegisterApplicationMasterResponse.java | 23 ++++++ .../src/main/proto/yarn_service_protos.proto | 6 ++ ...gisterApplicationMasterResponsePBImpl.java | 78 +++++++++++++++++-- .../ApplicationMasterService.java | 10 +-- .../scheduler/AbstractYarnScheduler.java | 15 ++-- .../scheduler/YarnScheduler.java | 10 +++ .../scheduler/capacity/CapacityScheduler.java | 22 ++++-- .../scheduler/fair/FairScheduler.java | 16 ++-- .../TestApplicationMasterService.java | 53 ++++++++++++- 10 files changed, 202 insertions(+), 34 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 5734b98702..a840e4f442 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -196,6 +196,9 @@ Release 2.6.0 - UNRELEASED YARN-2515. Updated ConverterUtils#toContainerId to parse epoch. (Tsuyoshi OZAWA via jianhe) + YARN-2448. Changed ApplicationMasterProtocol to expose RM-recognized resource + types to the AMs. (Varun Vasudev via vinodkv) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/RegisterApplicationMasterResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/RegisterApplicationMasterResponse.java index 79f9f3a442..33daf28123 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/RegisterApplicationMasterResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/RegisterApplicationMasterResponse.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.api.protocolrecords; import java.nio.ByteBuffer; +import java.util.EnumSet; import java.util.List; import java.util.Map; @@ -31,6 +32,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.NMToken; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; import org.apache.hadoop.yarn.util.Records; /** @@ -180,4 +182,25 @@ public abstract void setContainersFromPreviousAttempts( @Private @Unstable public abstract void setNMTokensFromPreviousAttempts(List nmTokens); + + /** + * Get a set of the resource types considered by the scheduler. + * + * @return a Map of RM settings + */ + @Public + @Unstable + public abstract EnumSet getSchedulerResourceTypes(); + + /** + * Set the resource types used by the scheduler. + * + * @param types + * a set of the resource types that the scheduler considers during + * scheduling + */ + @Private + @Unstable + public abstract void setSchedulerResourceTypes( + EnumSet types); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index df8784b144..4203744a5d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -47,6 +47,7 @@ message RegisterApplicationMasterResponseProto { repeated ContainerProto containers_from_previous_attempts = 4; optional string queue = 5; repeated NMTokenProto nm_tokens_from_previous_attempts = 6; + repeated SchedulerResourceTypes scheduler_resource_types = 7; } message FinishApplicationMasterRequestProto { @@ -88,6 +89,11 @@ message AllocateResponseProto { optional hadoop.common.TokenProto am_rm_token = 12; } +enum SchedulerResourceTypes { + MEMORY = 0; + CPU = 1; +} + ////////////////////////////////////////////////////// /////// client_RM_Protocol /////////////////////////// ////////////////////////////////////////////////////// diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/RegisterApplicationMasterResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/RegisterApplicationMasterResponsePBImpl.java index 06a637a3e6..32dc85d6f3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/RegisterApplicationMasterResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/RegisterApplicationMasterResponsePBImpl.java @@ -20,11 +20,7 @@ import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; +import java.util.*; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; @@ -43,6 +39,7 @@ import org.apache.hadoop.yarn.proto.YarnServiceProtos.NMTokenProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.RegisterApplicationMasterResponseProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.RegisterApplicationMasterResponseProtoOrBuilder; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; import com.google.protobuf.ByteString; import com.google.protobuf.TextFormat; @@ -61,6 +58,7 @@ public class RegisterApplicationMasterResponsePBImpl extends private Map applicationACLS = null; private List containersFromPreviousAttempts = null; private List nmTokens = null; + private EnumSet schedulerResourceTypes = null; public RegisterApplicationMasterResponsePBImpl() { builder = RegisterApplicationMasterResponseProto.newBuilder(); @@ -122,6 +120,9 @@ private void mergeLocalToBuilder() { Iterable iterable = getTokenProtoIterable(nmTokens); builder.addAllNmTokensFromPreviousAttempts(iterable); } + if(schedulerResourceTypes != null) { + addSchedulerResourceTypes(); + } } @@ -364,6 +365,73 @@ public void remove() { }; } + @Override + public EnumSet getSchedulerResourceTypes() { + initSchedulerResourceTypes(); + return this.schedulerResourceTypes; + } + + private void initSchedulerResourceTypes() { + if (this.schedulerResourceTypes != null) { + return; + } + RegisterApplicationMasterResponseProtoOrBuilder p = + viaProto ? proto : builder; + + List list = p.getSchedulerResourceTypesList(); + if (list.isEmpty()) { + this.schedulerResourceTypes = + EnumSet.noneOf(SchedulerResourceTypes.class); + } else { + this.schedulerResourceTypes = EnumSet.copyOf(list); + } + } + + private void addSchedulerResourceTypes() { + maybeInitBuilder(); + builder.clearSchedulerResourceTypes(); + if (schedulerResourceTypes == null) { + return; + } + Iterable values = + new Iterable() { + + @Override + public Iterator iterator() { + return new Iterator() { + Iterator settingsIterator = + schedulerResourceTypes.iterator(); + + @Override + public boolean hasNext() { + return settingsIterator.hasNext(); + } + + @Override + public SchedulerResourceTypes next() { + return settingsIterator.next(); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + this.builder.addAllSchedulerResourceTypes(values); + } + + @Override + public void setSchedulerResourceTypes(EnumSet types) { + if (types == null) { + return; + } + initSchedulerResourceTypes(); + this.schedulerResourceTypes.clear(); + this.schedulerResourceTypes.addAll(types); + } + private Resource convertFromProtoFormat(ResourceProto resource) { return new ResourcePBImpl(resource); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java index d77180cd6c..e6d878abd3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java @@ -22,11 +22,7 @@ import java.io.InputStream; import java.net.InetSocketAddress; import java.net.UnknownHostException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; @@ -329,6 +325,10 @@ public RegisterApplicationMasterResponse registerApplicationMaster( + transferredContainers.size() + " containers from previous" + " attempts and " + nmTokens.size() + " NM tokens."); } + + response.setSchedulerResourceTypes(rScheduler + .getSchedulingResourceTypes()); + return response; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index ee5dcbe7ec..0b5447b017 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -19,13 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.Timer; -import java.util.TimerTask; +import java.util.*; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.logging.Log; @@ -45,6 +39,7 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; @@ -502,4 +497,10 @@ public synchronized void updateNodeResource(RMNode nm, + " with the same resource: " + newResource); } } + + /** {@inheritDoc} */ + @Override + public EnumSet getSchedulingResourceTypes() { + return EnumSet.of(SchedulerResourceTypes.MEMORY); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java index 5ce16c2b88..b6c1018c93 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; import java.io.IOException; +import java.util.EnumSet; import java.util.List; import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate; @@ -41,6 +42,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; /** * This interface is used by the components to talk to the @@ -220,4 +222,12 @@ public String moveApplication(ApplicationId appId, String newQueue) * @throws YarnException */ void killAllAppsInQueue(String queueName) throws YarnException; + + /** + * Return a collection of the resource types that are considered when + * scheduling + * + * @return an EnumSet containing the resource types + */ + public EnumSet getSchedulingResourceTypes(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index a8ef94224b..6b810d7d8f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -20,13 +20,7 @@ import java.io.IOException; import java.io.InputStream; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Random; +import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -50,12 +44,12 @@ import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; -import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceOption; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.*; @@ -89,6 +83,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.utils.Lock; +import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; @@ -1285,4 +1280,15 @@ private LeafQueue getAndCheckLeafQueue(String queue) throws YarnException { } return (LeafQueue) ret; } + + /** {@inheritDoc} */ + @Override + public EnumSet getSchedulingResourceTypes() { + if (calculator.getClass().getName() + .equals(DefaultResourceCalculator.class.getName())) { + return EnumSet.of(SchedulerResourceTypes.MEMORY); + } + return EnumSet + .of(SchedulerResourceTypes.MEMORY, SchedulerResourceTypes.CPU); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index a35e49f282..9c40d48f06 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -19,13 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Set; +import java.util.*; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.logging.Log; @@ -50,6 +44,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; @@ -1529,4 +1524,11 @@ public synchronized void updateNodeResource(RMNode nm, queueMgr.getRootQueue().setSteadyFairShare(clusterResource); queueMgr.getRootQueue().recomputeSteadyShares(); } + + /** {@inheritDoc} */ + @Override + public EnumSet getSchedulingResourceTypes() { + return EnumSet + .of(SchedulerResourceTypes.MEMORY, SchedulerResourceTypes.CPU); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java index b0ffc859b6..3508a3cf76 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java @@ -18,6 +18,13 @@ package org.apache.hadoop.yarn.server.resourcemanager; +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; +import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; +import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; import org.junit.Assert; import org.apache.commons.logging.Log; @@ -40,8 +47,7 @@ import org.junit.BeforeClass; import org.junit.Test; -import java.util.ArrayList; -import java.util.List; +import java.util.*; import static java.lang.Thread.sleep; import static org.mockito.Matchers.any; @@ -259,4 +265,47 @@ public void testFinishApplicationMasterBeforeRegistering() throws Exception { } } } + + @Test(timeout = 3000000) + public void testResourceTypes() throws Exception { + HashMap> driver = + new HashMap>(); + + CapacitySchedulerConfiguration csconf = + new CapacitySchedulerConfiguration(); + csconf.setResourceComparator(DominantResourceCalculator.class); + YarnConfiguration testCapacityDRConf = new YarnConfiguration(csconf); + testCapacityDRConf.setClass(YarnConfiguration.RM_SCHEDULER, + CapacityScheduler.class, ResourceScheduler.class); + YarnConfiguration testCapacityDefConf = new YarnConfiguration(); + testCapacityDefConf.setClass(YarnConfiguration.RM_SCHEDULER, + CapacityScheduler.class, ResourceScheduler.class); + YarnConfiguration testFairDefConf = new YarnConfiguration(); + testFairDefConf.setClass(YarnConfiguration.RM_SCHEDULER, + FairScheduler.class, ResourceScheduler.class); + + driver.put(conf, EnumSet.of(SchedulerResourceTypes.MEMORY)); + driver.put(testCapacityDRConf, + EnumSet.of(SchedulerResourceTypes.CPU, SchedulerResourceTypes.MEMORY)); + driver.put(testCapacityDefConf, EnumSet.of(SchedulerResourceTypes.MEMORY)); + driver.put(testFairDefConf, + EnumSet.of(SchedulerResourceTypes.MEMORY, SchedulerResourceTypes.CPU)); + + for (Map.Entry> entry : driver + .entrySet()) { + EnumSet expectedValue = entry.getValue(); + MockRM rm = new MockRM(entry.getKey()); + rm.start(); + MockNM nm1 = rm.registerNode("127.0.0.1:1234", 6 * GB); + RMApp app1 = rm.submitApp(2048); + nm1.nodeHeartbeat(true); + RMAppAttempt attempt1 = app1.getCurrentAppAttempt(); + MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId()); + RegisterApplicationMasterResponse resp = am1.registerAppAttempt(); + EnumSet types = resp.getSchedulerResourceTypes(); + LOG.info("types = " + types.toString()); + Assert.assertEquals(expectedValue, types); + rm.stop(); + } + } } From 47bdfa044aa1d587b24edae8b1b0c796d829c960 Mon Sep 17 00:00:00 2001 From: XUAN Date: Wed, 10 Sep 2014 11:44:41 -0700 Subject: [PATCH 06/13] YARN-2459. RM crashes if App gets rejected for any reason and HA is enabled. Contributed by Jian He --- hadoop-yarn-project/CHANGES.txt | 3 ++ .../server/resourcemanager/RMAppManager.java | 2 +- .../resourcemanager/rmapp/RMAppImpl.java | 6 ++- .../server/resourcemanager/TestRMRestart.java | 50 +++++++++++++++++++ .../rmapp/TestRMAppTransitions.java | 20 +++++++- 5 files changed, 77 insertions(+), 4 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index a840e4f442..2feae4601b 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -311,6 +311,9 @@ Release 2.6.0 - UNRELEASED YARN-1458. FairScheduler: Zero weight can lead to livelock. (Zhihai Xu via kasha) + YARN-2459. RM crashes if App gets rejected for any reason + and HA is enabled. (Jian He via xgong) + Release 2.5.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index 51024cfcb8..a789e929d1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -401,7 +401,7 @@ private boolean isApplicationInFinalState(RMAppState rmAppState) { } } - private Credentials parseCredentials(ApplicationSubmissionContext application) + protected Credentials parseCredentials(ApplicationSubmissionContext application) throws IOException { Credentials credentials = new Credentials(); DataInputByteBuffer dibb = new DataInputByteBuffer(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 48cf460299..0b81f96af8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -150,8 +150,10 @@ RMAppEventType.START, new RMAppNewlySavingTransition()) RMAppEventType.RECOVER, new RMAppRecoveredTransition()) .addTransition(RMAppState.NEW, RMAppState.KILLED, RMAppEventType.KILL, new AppKilledTransition()) - .addTransition(RMAppState.NEW, RMAppState.FAILED, - RMAppEventType.APP_REJECTED, new AppRejectedTransition()) + .addTransition(RMAppState.NEW, RMAppState.FINAL_SAVING, + RMAppEventType.APP_REJECTED, + new FinalSavingTransition(new AppRejectedTransition(), + RMAppState.FAILED)) // Transitions from NEW_SAVING state .addTransition(RMAppState.NEW_SAVING, RMAppState.NEW_SAVING, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index 5f63caf9a7..7d511db36f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -65,6 +65,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; @@ -92,6 +93,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; +import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.log4j.Level; @@ -1606,6 +1609,53 @@ public void testClientRetryOnKillingApplication() throws Exception { Assert.assertEquals(2, ((TestMemoryRMStateStore) memStore).updateApp); } + // Test Application that fails on submission is saved in state store. + @Test (timeout = 20000) + public void testAppFailedOnSubmissionSavedInStateStore() throws Exception { + conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, + "kerberos"); + UserGroupInformation.setConfiguration(conf); + MemoryRMStateStore memStore = new MemoryRMStateStore(); + memStore.init(conf); + + MockRM rm1 = new TestSecurityMockRM(conf, memStore) { + @Override + protected RMAppManager createRMAppManager() { + return new TestRMAppManager(this.rmContext, this.scheduler, + this.masterService, this.applicationACLsManager, conf); + } + + class TestRMAppManager extends RMAppManager { + + public TestRMAppManager(RMContext context, YarnScheduler scheduler, + ApplicationMasterService masterService, + ApplicationACLsManager applicationACLsManager, Configuration conf) { + super(context, scheduler, masterService, applicationACLsManager, conf); + } + + @Override + protected Credentials parseCredentials( + ApplicationSubmissionContext application) throws IOException { + throw new IOException("Parsing credential error."); + } + } + }; + rm1.start(); + RMApp app1 = + rm1.submitApp(200, "name", "user", + new HashMap(), false, "default", -1, + null, "MAPREDUCE", false); + rm1.waitForState(app1.getApplicationId(), RMAppState.FAILED); + // Check app staet is saved in state store. + Assert.assertEquals(RMAppState.FAILED, memStore.getState() + .getApplicationState().get(app1.getApplicationId()).getState()); + + MockRM rm2 = new TestSecurityMockRM(conf, memStore); + rm2.start(); + // Restarted RM has the failed app info too. + rm2.waitForState(app1.getApplicationId(), RMAppState.FAILED); + } + @SuppressWarnings("resource") @Test (timeout = 60000) public void testQueueMetricsOnRMRestart() throws Exception { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java index 2fc44319a6..3c871df913 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java @@ -526,10 +526,28 @@ public void testAppNewReject() throws IOException { rmDispatcher.await(); sendAppUpdateSavedEvent(application); assertFailed(application, rejectedText); - assertAppFinalStateNotSaved(application); + assertAppFinalStateSaved(application); verifyApplicationFinished(RMAppState.FAILED); } + @Test (timeout = 30000) + public void testAppNewRejectAddToStore() throws IOException { + LOG.info("--- START: testAppNewRejectAddToStore ---"); + + RMApp application = createNewTestApp(null); + // NEW => FAILED event RMAppEventType.APP_REJECTED + String rejectedText = "Test Application Rejected"; + RMAppEvent event = + new RMAppRejectedEvent(application.getApplicationId(), rejectedText); + application.handle(event); + rmDispatcher.await(); + sendAppUpdateSavedEvent(application); + assertFailed(application, rejectedText); + assertAppFinalStateSaved(application); + verifyApplicationFinished(RMAppState.FAILED); + rmContext.getStateStore().removeApplication(application); + } + @Test (timeout = 30000) public void testAppNewSavingKill() throws IOException { LOG.info("--- START: testAppNewSavingKill ---"); From cbfe26370b85161c79fdd48bf69c95d5725d8f6a Mon Sep 17 00:00:00 2001 From: Jian He Date: Wed, 10 Sep 2014 12:47:34 -0700 Subject: [PATCH 07/13] YARN-2158. Fixed TestRMWebServicesAppsModification#testSingleAppKill test failure. Contributed by Varun Vasudev --- hadoop-yarn-project/CHANGES.txt | 3 ++ .../TestRMWebServicesAppsModification.java | 50 +++++++------------ 2 files changed, 20 insertions(+), 33 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 2feae4601b..cf5ec65e33 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -314,6 +314,9 @@ Release 2.6.0 - UNRELEASED YARN-2459. RM crashes if App gets rejected for any reason and HA is enabled. (Jian He via xgong) + YARN-2158. Fixed TestRMWebServicesAppsModification#testSingleAppKill test + failure. (Varun Vasudev via jianhe) + Release 2.5.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesAppsModification.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesAppsModification.java index e02e410c5a..536aa8d1b4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesAppsModification.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesAppsModification.java @@ -45,7 +45,6 @@ import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.authentication.server.AuthenticationFilter; @@ -78,7 +77,6 @@ import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONObject; import org.junit.After; -import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -112,12 +110,12 @@ public class TestRMWebServicesAppsModification extends JerseyTest { private static final int CONTAINER_MB = 1024; - private Injector injector; + private static Injector injector; private String webserviceUserName = "testuser"; private boolean setAuthFilter = false; - public class GuiceServletConfig extends GuiceServletContextListener { + public static class GuiceServletConfig extends GuiceServletContextListener { @Override protected Injector getInjector() { @@ -263,9 +261,9 @@ public void testSingleAppState() throws Exception { .constructWebResource("apps", app.getApplicationId().toString(), "state").accept(mediaType).get(ClientResponse.class); assertEquals(Status.OK, response.getClientResponseStatus()); - if (mediaType == MediaType.APPLICATION_JSON) { + if (mediaType.equals(MediaType.APPLICATION_JSON)) { verifyAppStateJson(response, RMAppState.ACCEPTED); - } else if (mediaType == MediaType.APPLICATION_XML) { + } else if (mediaType.equals(MediaType.APPLICATION_XML)) { verifyAppStateXML(response, RMAppState.ACCEPTED); } } @@ -285,10 +283,6 @@ public void testSingleAppKill() throws Exception { RMApp app = rm.submitApp(CONTAINER_MB, "", webserviceUserName); amNodeManager.nodeHeartbeat(true); - ClientResponse response = - this - .constructWebResource("apps", app.getApplicationId().toString(), - "state").accept(mediaType).get(ClientResponse.class); AppState targetState = new AppState(YarnApplicationState.KILLED.toString()); @@ -298,7 +292,7 @@ public void testSingleAppKill() throws Exception { } else { entity = targetState; } - response = + ClientResponse response = this .constructWebResource("apps", app.getApplicationId().toString(), "state").entity(entity, contentType).accept(mediaType) @@ -309,10 +303,12 @@ public void testSingleAppKill() throws Exception { continue; } assertEquals(Status.ACCEPTED, response.getClientResponseStatus()); - if (mediaType == MediaType.APPLICATION_JSON) { - verifyAppStateJson(response, RMAppState.KILLING, RMAppState.ACCEPTED); + if (mediaType.equals(MediaType.APPLICATION_JSON)) { + verifyAppStateJson(response, RMAppState.FINAL_SAVING, + RMAppState.KILLED, RMAppState.KILLING, RMAppState.ACCEPTED); } else { - verifyAppStateXML(response, RMAppState.KILLING, RMAppState.ACCEPTED); + verifyAppStateXML(response, RMAppState.FINAL_SAVING, + RMAppState.KILLED, RMAppState.KILLING, RMAppState.ACCEPTED); } String locationHeaderValue = @@ -338,7 +334,7 @@ public void testSingleAppKill() throws Exception { || (response.getClientResponseStatus() == Status.OK)); if (response.getClientResponseStatus() == Status.OK) { assertEquals(RMAppState.KILLED, app.getState()); - if (mediaType == MediaType.APPLICATION_JSON) { + if (mediaType.equals(MediaType.APPLICATION_JSON)) { verifyAppStateJson(response, RMAppState.KILLED); } else { verifyAppStateXML(response, RMAppState.KILLED); @@ -350,7 +346,6 @@ public void testSingleAppKill() throws Exception { } rm.stop(); - return; } @Test @@ -396,7 +391,6 @@ public void testSingleAppKillInvalidState() throws Exception { } rm.stop(); - return; } private static String appStateToJSON(AppState state) throws Exception { @@ -422,7 +416,6 @@ protected static void verifyAppStateJson(ClientResponse response, } String msg = "app state incorrect, got " + responseState; assertTrue(msg, valid); - return; } protected static void verifyAppStateXML(ClientResponse response, @@ -447,7 +440,6 @@ protected static void verifyAppStateXML(ClientResponse response, } String msg = "app state incorrect, got " + state; assertTrue(msg, valid); - return; } @Test(timeout = 30000) @@ -487,7 +479,6 @@ public void testSingleAppKillUnauthorized() throws Exception { validateResponseStatus(response, Status.FORBIDDEN); } rm.stop(); - return; } @@ -510,7 +501,6 @@ public void testSingleAppKillInvalidId() throws Exception { assertEquals(Status.NOT_FOUND, response.getClientResponseStatus()); } rm.stop(); - return; } @After @@ -571,7 +561,6 @@ public void testGetNewApplication() throws Exception { testGetNewApplication(acceptMedia); } rm.stop(); - return; } protected String testGetNewApplication(String mediaType) throws JSONException, @@ -606,7 +595,7 @@ protected String validateGetNewApplicationResponse(ClientResponse resp) protected String validateGetNewApplicationJsonResponse(JSONObject json) throws JSONException { String appId = json.getString("application-id"); - assertTrue(appId.isEmpty() == false); + assertTrue(!appId.isEmpty()); JSONObject maxResources = json.getJSONObject("maximum-resource-capability"); long memory = maxResources.getLong("memory"); long vCores = maxResources.getLong("vCores"); @@ -626,7 +615,7 @@ protected String validateGetNewApplicationXMLResponse(String response) assertEquals("incorrect number of elements", 1, nodes.getLength()); Element element = (Element) nodes.item(0); String appId = WebServicesTestUtils.getXmlString(element, "application-id"); - assertTrue(appId.isEmpty() == false); + assertTrue(!appId.isEmpty()); NodeList maxResourceNodes = element.getElementsByTagName("maximum-resource-capability"); assertEquals(1, maxResourceNodes.getLength()); @@ -656,7 +645,6 @@ public void testGetNewApplicationAndSubmit() throws Exception { } } rm.stop(); - return; } public void testAppSubmit(String acceptMedia, String contentMedia) @@ -721,14 +709,14 @@ public void testAppSubmit(String acceptMedia, String contentMedia) this.constructWebResource(urlPath).accept(acceptMedia) .entity(appInfo, contentMedia).post(ClientResponse.class); - if (this.isAuthenticationEnabled() == false) { + if (!this.isAuthenticationEnabled()) { assertEquals(Status.UNAUTHORIZED, response.getClientResponseStatus()); return; } assertEquals(Status.ACCEPTED, response.getClientResponseStatus()); - assertTrue(response.getHeaders().getFirst(HttpHeaders.LOCATION).isEmpty() == false); + assertTrue(!response.getHeaders().getFirst(HttpHeaders.LOCATION).isEmpty()); String locURL = response.getHeaders().getFirst(HttpHeaders.LOCATION); - assertTrue(locURL.indexOf("/apps/application") != -1); + assertTrue(locURL.contains("/apps/application")); appId = locURL.substring(locURL.indexOf("/apps/") + "/apps/".length()); WebResource res = resource().uri(new URI(locURL)); @@ -775,7 +763,6 @@ public void testAppSubmit(String acceptMedia, String contentMedia) this.constructWebResource("apps", appId).accept(acceptMedia) .get(ClientResponse.class); assertEquals(Status.OK, response.getClientResponseStatus()); - return; } public void testAppSubmitErrors(String acceptMedia, String contentMedia) @@ -785,14 +772,13 @@ public void testAppSubmitErrors(String acceptMedia, String contentMedia) // REST API and make sure we get the right error response codes String urlPath = "apps"; - String appId = ""; ApplicationSubmissionContextInfo appInfo = new ApplicationSubmissionContextInfo(); ClientResponse response = this.constructWebResource(urlPath).accept(acceptMedia) .entity(appInfo, contentMedia).post(ClientResponse.class); validateResponseStatus(response, Status.BAD_REQUEST); - appId = "random"; + String appId = "random"; appInfo.setApplicationId(appId); response = this.constructWebResource(urlPath).accept(acceptMedia) @@ -827,8 +813,6 @@ public void testAppSubmitErrors(String acceptMedia, String contentMedia) this.constructWebResource(urlPath).accept(acceptMedia) .entity(appInfo, contentMedia).post(ClientResponse.class); validateResponseStatus(response, Status.BAD_REQUEST); - - return; } @Test From b02a4b40610e93eef6559db09a11d287e859446d Mon Sep 17 00:00:00 2001 From: Alejandro Abdelnur Date: Wed, 10 Sep 2014 14:26:15 -0700 Subject: [PATCH 08/13] HADOOP-10758. KMS: add ACLs on per key basis. (tucu) --- .../hadoop-common/CHANGES.txt | 2 + .../hadoop-kms/src/main/conf/kms-acls.xml | 38 +++ .../hadoop/crypto/key/kms/server/KMSACLs.java | 97 +++++- .../key/kms/server/KMSConfiguration.java | 9 + .../crypto/key/kms/server/KMSWebApp.java | 17 +- .../server/KeyAuthorizationKeyProvider.java | 276 ++++++++++++++++++ .../hadoop-kms/src/site/apt/index.apt.vm | 106 +++++++ .../hadoop/crypto/key/kms/server/TestKMS.java | 236 ++++++++++++++- .../TestKeyAuthorizationKeyProvider.java | 218 ++++++++++++++ 9 files changed, 986 insertions(+), 13 deletions(-) create mode 100644 hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KeyAuthorizationKeyProvider.java create mode 100644 hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKeyAuthorizationKeyProvider.java diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index b2157d6f9e..3cea14a317 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -509,6 +509,8 @@ Release 2.6.0 - UNRELEASED HADOOP-11057. checknative command to probe for winutils.exe on windows. (Xiaoyu Yao via cnauroth) + HADOOP-10758. KMS: add ACLs on per key basis. (tucu) + OPTIMIZATIONS HADOOP-10838. Byte array native checksumming. (James Thomas via todd) diff --git a/hadoop-common-project/hadoop-kms/src/main/conf/kms-acls.xml b/hadoop-common-project/hadoop-kms/src/main/conf/kms-acls.xml index cdff629128..24a46b86ec 100644 --- a/hadoop-common-project/hadoop-kms/src/main/conf/kms-acls.xml +++ b/hadoop-common-project/hadoop-kms/src/main/conf/kms-acls.xml @@ -94,4 +94,42 @@ ACL for decrypt EncryptedKey CryptoExtension operations + + + default.key.acl.MANAGEMENT + * + + default ACL for MANAGEMENT operations for all key acls that are not + explicitly defined. + + + + + default.key.acl.GENERATE_EEK + * + + default ACL for GENERATE_EEK operations for all key acls that are not + explicitly defined. + + + + + default.key.acl.DECRYPT_EEK + * + + default ACL for DECRYPT_EEK operations for all key acls that are not + explicitly defined. + + + + + default.key.acl.READ + * + + default ACL for READ operations for all key acls that are not + explicitly defined. + + + + diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSACLs.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSACLs.java index 8a10bb2be9..530fe1102b 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSACLs.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSACLs.java @@ -20,6 +20,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.kms.server.KMS.KMSOp; +import org.apache.hadoop.crypto.key.kms.server.KeyAuthorizationKeyProvider.KeyACLs; +import org.apache.hadoop.crypto.key.kms.server.KeyAuthorizationKeyProvider.KeyOpType; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; @@ -32,6 +34,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.regex.Pattern; /** * Provides access to the AccessControlLists used by KMS, @@ -39,7 +42,7 @@ * are defined has been updated. */ @InterfaceAudience.Private -public class KMSACLs implements Runnable { +public class KMSACLs implements Runnable, KeyACLs { private static final Logger LOG = LoggerFactory.getLogger(KMSACLs.class); private static final String UNAUTHORIZED_MSG_WITH_KEY = @@ -67,6 +70,9 @@ public String getBlacklistConfigKey() { private volatile Map acls; private volatile Map blacklistedAcls; + private volatile Map> keyAcls; + private final Map defaultKeyAcls = + new HashMap(); private ScheduledExecutorService executorService; private long lastReload; @@ -74,14 +80,15 @@ public String getBlacklistConfigKey() { if (conf == null) { conf = loadACLs(); } - setACLs(conf); + setKMSACLs(conf); + setKeyACLs(conf); } public KMSACLs() { this(null); } - private void setACLs(Configuration conf) { + private void setKMSACLs(Configuration conf) { Map tempAcls = new HashMap(); Map tempBlacklist = new HashMap(); for (Type aclType : Type.values()) { @@ -99,14 +106,69 @@ private void setACLs(Configuration conf) { blacklistedAcls = tempBlacklist; } + private void setKeyACLs(Configuration conf) { + Map> tempKeyAcls = + new HashMap>(); + Map allKeyACLS = + conf.getValByRegex(Pattern.quote(KMSConfiguration.KEY_ACL_PREFIX)); + for (Map.Entry keyAcl : allKeyACLS.entrySet()) { + String k = keyAcl.getKey(); + // this should be of type "key.acl.." + int keyNameStarts = KMSConfiguration.KEY_ACL_PREFIX.length(); + int keyNameEnds = k.lastIndexOf("."); + if (keyNameStarts >= keyNameEnds) { + LOG.warn("Invalid key name '{}'", k); + } else { + String aclStr = keyAcl.getValue(); + String keyName = k.substring(keyNameStarts, keyNameEnds); + String keyOp = k.substring(keyNameEnds + 1); + KeyOpType aclType = null; + try { + aclType = KeyOpType.valueOf(keyOp); + } catch (IllegalArgumentException e) { + LOG.warn("Invalid key Operation '{}'", keyOp); + } + if (aclType != null) { + // On the assumption this will be single threaded.. else we need to + // ConcurrentHashMap + HashMap aclMap = + tempKeyAcls.get(keyName); + if (aclMap == null) { + aclMap = new HashMap(); + tempKeyAcls.put(keyName, aclMap); + } + aclMap.put(aclType, new AccessControlList(aclStr)); + LOG.info("KEY_NAME '{}' KEY_OP '{}' ACL '{}'", + keyName, aclType, aclStr); + } + } + } + + keyAcls = tempKeyAcls; + for (KeyOpType keyOp : KeyOpType.values()) { + if (!defaultKeyAcls.containsKey(keyOp)) { + String confKey = KMSConfiguration.DEFAULT_KEY_ACL_PREFIX + keyOp; + String aclStr = conf.get(confKey); + if (aclStr != null) { + if (aclStr.equals("*")) { + LOG.info("Default Key ACL for KEY_OP '{}' is set to '*'", keyOp); + } + defaultKeyAcls.put(keyOp, new AccessControlList(aclStr)); + } + } + } + } + @Override public void run() { try { if (KMSConfiguration.isACLsFileNewer(lastReload)) { - setACLs(loadACLs()); + setKMSACLs(loadACLs()); + setKeyACLs(loadACLs()); } } catch (Exception ex) { - LOG.warn("Could not reload ACLs file: " + ex.toString(), ex); + LOG.warn( + String.format("Could not reload ACLs file: '%s'", ex.toString()), ex); } } @@ -164,4 +226,29 @@ public void assertAccess(KMSACLs.Type aclType, } } + @Override + public boolean hasAccessToKey(String keyName, UserGroupInformation ugi, + KeyOpType opType) { + Map keyAcl = keyAcls.get(keyName); + if (keyAcl == null) { + // Get KeyAcl map of DEFAULT KEY. + keyAcl = defaultKeyAcls; + } + // If No key acl defined for this key, check to see if + // there are key defaults configured for this operation + AccessControlList acl = keyAcl.get(opType); + if (acl == null) { + // If no acl is specified for this operation, + // deny access + return false; + } else { + return acl.isUserAllowed(ugi); + } + } + + @Override + public boolean isACLPresent(String keyName, KeyOpType opType) { + return (keyAcls.containsKey(keyName) || defaultKeyAcls.containsKey(opType)); + } + } diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java index 35dccfc489..a7daa24692 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java @@ -36,6 +36,9 @@ public class KMSConfiguration { public static final String CONFIG_PREFIX = "hadoop.kms."; + public static final String KEY_ACL_PREFIX = "key.acl."; + public static final String DEFAULT_KEY_ACL_PREFIX = "default.key.acl."; + // Property to Enable/Disable Caching public static final String KEY_CACHE_ENABLE = CONFIG_PREFIX + "cache.enable"; @@ -57,6 +60,12 @@ public class KMSConfiguration { // 10 secs public static final long KMS_AUDIT_AGGREGATION_DELAY_DEFAULT = 10000; + // Property to Enable/Disable per Key authorization + public static final String KEY_AUTHORIZATION_ENABLE = CONFIG_PREFIX + + "key.authorization.enable"; + + public static final boolean KEY_AUTHORIZATION_ENABLE_DEFAULT = true; + static Configuration getConfiguration(boolean loadHadoopDefaults, String ... resources) { Configuration conf = new Configuration(loadHadoopDefaults); diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java index aaf90e8cff..0827b78286 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java @@ -68,7 +68,7 @@ public class KMSWebApp implements ServletContextListener { private JmxReporter jmxReporter; private static Configuration kmsConf; - private static KMSACLs acls; + private static KMSACLs kmsAcls; private static Meter adminCallsMeter; private static Meter keyCallsMeter; private static Meter unauthorizedCallsMeter; @@ -126,8 +126,8 @@ public void contextInitialized(ServletContextEvent sce) { LOG.info(" KMS Hadoop Version: " + VersionInfo.getVersion()); LOG.info("-------------------------------------------------------------"); - acls = new KMSACLs(); - acls.startReloader(); + kmsAcls = new KMSACLs(); + kmsAcls.startReloader(); metricRegistry = new MetricRegistry(); jmxReporter = JmxReporter.forRegistry(metricRegistry).build(); @@ -188,6 +188,13 @@ public void contextInitialized(ServletContextEvent sce) { keyProviderCryptoExtension = new EagerKeyGeneratorKeyProviderCryptoExtension(kmsConf, keyProviderCryptoExtension); + if (kmsConf.getBoolean(KMSConfiguration.KEY_AUTHORIZATION_ENABLE, + KMSConfiguration.KEY_AUTHORIZATION_ENABLE_DEFAULT)) { + keyProviderCryptoExtension = + new KeyAuthorizationKeyProvider( + keyProviderCryptoExtension, kmsAcls); + } + LOG.info("Initialized KeyProviderCryptoExtension " + keyProviderCryptoExtension); final int defaultBitlength = kmsConf @@ -213,7 +220,7 @@ public void contextInitialized(ServletContextEvent sce) { @Override public void contextDestroyed(ServletContextEvent sce) { kmsAudit.shutdown(); - acls.stopReloader(); + kmsAcls.stopReloader(); jmxReporter.stop(); jmxReporter.close(); metricRegistry = null; @@ -225,7 +232,7 @@ public static Configuration getConfiguration() { } public static KMSACLs getACLs() { - return acls; + return kmsAcls; } public static Meter getAdminCallsMeter() { diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KeyAuthorizationKeyProvider.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KeyAuthorizationKeyProvider.java new file mode 100644 index 0000000000..fe908e38c9 --- /dev/null +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KeyAuthorizationKeyProvider.java @@ -0,0 +1,276 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.crypto.key.kms.server; + +import java.io.IOException; +import java.security.GeneralSecurityException; +import java.security.NoSuchAlgorithmException; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.crypto.key.KeyProvider; +import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.authorize.AuthorizationException; + +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import com.google.common.collect.ImmutableMap; + +/** + * A {@link KeyProvider} proxy that checks whether the current user derived via + * {@link UserGroupInformation}, is authorized to perform the following + * type of operations on a Key : + *
    + *
  1. MANAGEMENT operations : createKey, rollNewVersion, deleteKey
  2. + *
  3. GENERATE_EEK operations : generateEncryptedKey, warmUpEncryptedKeys
  4. + *
  5. DECRYPT_EEK operation : decryptEncryptedKey
  6. + *
  7. READ operations : getKeyVersion, getKeyVersions, getMetadata, + * getKeysMetadata, getCurrentKey
  8. + *
+ * The read operations (getCurrentKeyVersion / getMetadata) etc are not checked. + */ +public class KeyAuthorizationKeyProvider extends KeyProviderCryptoExtension { + + public static final String KEY_ACL = "key.acl."; + private static final String KEY_ACL_NAME = KEY_ACL + "name"; + + public enum KeyOpType { + ALL, READ, MANAGEMENT, GENERATE_EEK, DECRYPT_EEK; + } + + /** + * Interface that needs to be implemented by a client of the + * KeyAuthorizationKeyProvider. + */ + public static interface KeyACLs { + + /** + * This is called by the KeyProvider to check if the given user is + * authorized to perform the specified operation on the given acl name. + * @param aclName name of the key ACL + * @param ugi User's UserGroupInformation + * @param opType Operation Type + * @return true if user has access to the aclName and opType else false + */ + public boolean hasAccessToKey(String aclName, UserGroupInformation ugi, + KeyOpType opType); + + /** + * + * @param aclName ACL name + * @param opType Operation Type + * @return true if AclName exists else false + */ + public boolean isACLPresent(String aclName, KeyOpType opType); + } + + private final KeyProviderCryptoExtension provider; + private final KeyACLs acls; + + /** + * The constructor takes a {@link KeyProviderCryptoExtension} and an + * implementation of KeyACLs. All calls are delegated to the + * provider keyProvider after authorization check (if required) + * @param keyProvider + * @param acls + */ + public KeyAuthorizationKeyProvider(KeyProviderCryptoExtension keyProvider, + KeyACLs acls) { + super(keyProvider, null); + this.provider = keyProvider; + this.acls = acls; + } + + // This method first checks if "key.acl.name" attribute is present as an + // attribute in the provider Options. If yes, use the aclName for any + // subsequent access checks, else use the keyName as the aclName and set it + // as the value of the "key.acl.name" in the key's metadata. + private void authorizeCreateKey(String keyName, Options options, + UserGroupInformation ugi) throws IOException{ + Preconditions.checkNotNull(ugi, "UserGroupInformation cannot be null"); + Map attributes = options.getAttributes(); + String aclName = attributes.get(KEY_ACL_NAME); + boolean success = false; + if (Strings.isNullOrEmpty(aclName)) { + if (acls.isACLPresent(keyName, KeyOpType.MANAGEMENT)) { + options.setAttributes(ImmutableMap. builder() + .putAll(attributes).put(KEY_ACL_NAME, keyName).build()); + success = + acls.hasAccessToKey(keyName, ugi, KeyOpType.MANAGEMENT) + || acls.hasAccessToKey(keyName, ugi, KeyOpType.ALL); + } else { + success = false; + } + } else { + success = acls.isACLPresent(aclName, KeyOpType.MANAGEMENT) && + (acls.hasAccessToKey(aclName, ugi, KeyOpType.MANAGEMENT) + || acls.hasAccessToKey(aclName, ugi, KeyOpType.ALL)); + } + if (!success) + throw new AuthorizationException(String.format("User [%s] is not" + + " authorized to create key !!", ugi.getShortUserName())); + } + + private void checkAccess(String aclName, UserGroupInformation ugi, + KeyOpType opType) throws AuthorizationException { + Preconditions.checkNotNull(aclName, "Key ACL name cannot be null"); + Preconditions.checkNotNull(ugi, "UserGroupInformation cannot be null"); + if (acls.isACLPresent(aclName, KeyOpType.MANAGEMENT) && + (acls.hasAccessToKey(aclName, ugi, opType) + || acls.hasAccessToKey(aclName, ugi, KeyOpType.ALL))) { + return; + } else { + throw new AuthorizationException(String.format("User [%s] is not" + + " authorized to perform [%s] on key with ACL name [%s]!!", + ugi.getShortUserName(), opType, aclName)); + } + } + + @Override + public KeyVersion createKey(String name, Options options) + throws NoSuchAlgorithmException, IOException { + authorizeCreateKey(name, options, getUser()); + return provider.createKey(name, options); + } + + @Override + public KeyVersion createKey(String name, byte[] material, Options options) + throws IOException { + authorizeCreateKey(name, options, getUser()); + return provider.createKey(name, material, options); + } + + @Override + public KeyVersion rollNewVersion(String name) + throws NoSuchAlgorithmException, IOException { + doAccessCheck(name, KeyOpType.MANAGEMENT); + return provider.rollNewVersion(name); + } + + @Override + public void deleteKey(String name) throws IOException { + doAccessCheck(name, KeyOpType.MANAGEMENT); + provider.deleteKey(name); + } + + @Override + public KeyVersion rollNewVersion(String name, byte[] material) + throws IOException { + doAccessCheck(name, KeyOpType.MANAGEMENT); + return provider.rollNewVersion(name, material); + } + + @Override + public void warmUpEncryptedKeys(String... names) throws IOException { + for (String name : names) { + doAccessCheck(name, KeyOpType.GENERATE_EEK); + } + provider.warmUpEncryptedKeys(names); + } + + @Override + public EncryptedKeyVersion generateEncryptedKey(String encryptionKeyName) + throws IOException, GeneralSecurityException { + doAccessCheck(encryptionKeyName, KeyOpType.GENERATE_EEK); + return provider.generateEncryptedKey(encryptionKeyName); + } + + @Override + public KeyVersion decryptEncryptedKey(EncryptedKeyVersion encryptedKeyVersion) + throws IOException, GeneralSecurityException { + doAccessCheck( + encryptedKeyVersion.getEncryptionKeyName(), KeyOpType.DECRYPT_EEK); + return provider.decryptEncryptedKey(encryptedKeyVersion); + } + + @Override + public KeyVersion getKeyVersion(String versionName) throws IOException { + KeyVersion keyVersion = provider.getKeyVersion(versionName); + if (keyVersion != null) { + doAccessCheck(keyVersion.getName(), KeyOpType.READ); + } + return keyVersion; + } + + @Override + public List getKeys() throws IOException { + return provider.getKeys(); + } + + @Override + public List getKeyVersions(String name) throws IOException { + doAccessCheck(name, KeyOpType.READ); + return provider.getKeyVersions(name); + } + + @Override + public Metadata getMetadata(String name) throws IOException { + doAccessCheck(name, KeyOpType.READ); + return provider.getMetadata(name); + } + + @Override + public Metadata[] getKeysMetadata(String... names) throws IOException { + for (String name : names) { + doAccessCheck(name, KeyOpType.READ); + } + return provider.getKeysMetadata(names); + } + + @Override + public KeyVersion getCurrentKey(String name) throws IOException { + doAccessCheck(name, KeyOpType.READ); + return provider.getCurrentKey(name); + } + + @Override + public void flush() throws IOException { + provider.flush(); + } + + @Override + public boolean isTransient() { + return provider.isTransient(); + } + + private void doAccessCheck(String keyName, KeyOpType opType) throws + IOException { + Metadata metadata = provider.getMetadata(keyName); + if (metadata != null) { + String aclName = metadata.getAttributes().get(KEY_ACL_NAME); + checkAccess((aclName == null) ? keyName : aclName, getUser(), opType); + } + } + + private UserGroupInformation getUser() throws IOException { + return UserGroupInformation.getCurrentUser(); + } + + @Override + protected KeyProvider getKeyProvider() { + return this; + } + + @Override + public String toString() { + return provider.toString(); + } + +} diff --git a/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm b/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm index e947c9b398..c76ca3b30a 100644 --- a/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm +++ b/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm @@ -443,6 +443,112 @@ $ keytool -genkey -alias tomcat -keyalg RSA +---+ +*** Key Access Control + + KMS supports access control for all non-read operations at the Key level. + All Key Access operations are classified as : + + * MANAGEMENT - createKey, deleteKey, rolloverNewVersion + + * GENERATE_EEK - generateEncryptedKey, warmUpEncryptedKeys + + * DECRYPT_EEK - decryptEncryptedKey; + + * READ - getKeyVersion, getKeyVersions, getMetadata, getKeysMetadata, + getCurrentKey; + + * ALL - all of the above; + + These can be defined in the KMS <<>> as follows + + For all keys for which a key access has not been explicitly configured, It + is possible to configure a default key access control for a subset of the + operation types. + + If no ACL is configured for a specific key AND no default ACL is configured + for the requested operation, then access will be DENIED. + + <> The default ACL does not support <<>> operation qualifier. + ++---+ + + key.acl.testKey1.MANAGEMENT + * + + ACL for create-key, deleteKey and rolloverNewVersion operations. + + + + + key.acl.testKey2.GENERATE_EEK + * + + ACL for generateEncryptedKey operations. + + + + + key.acl.testKey3.DECRYPT_EEK + * + + ACL for decryptEncryptedKey operations. + + + + + key.acl.testKey4.READ + * + + ACL for getKeyVersion, getKeyVersions, getMetadata, getKeysMetadata, + getCurrentKey operations + + + + + key.acl.testKey5.ALL + * + + ACL for ALL operations. + + + + + default.key.acl.MANAGEMENT + user1,user2 + + default ACL for MANAGEMENT operations for all keys that are not + explicitly defined. + + + + + default.key.acl.GENERATE_EEK + user1,user2 + + default ACL for GENERATE_EEK operations for all keys that are not + explicitly defined. + + + + + default.key.acl.DECRYPT_EEK + user1,user2 + + default ACL for DECRYPT_EEK operations for all keys that are not + explicitly defined. + + + + + default.key.acl.READ + user1,user2 + + default ACL for READ operations for all keys that are not + explicitly defined. + + ++---+ + ** KMS Delegation Token Configuration KMS delegation token secret manager can be configured with the following diff --git a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java index 74eab5cdfe..1ca0c0dc5b 100644 --- a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java +++ b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java @@ -18,8 +18,10 @@ package org.apache.hadoop.crypto.key.kms.server; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.crypto.key.kms.server.KeyAuthorizationKeyProvider; import org.apache.hadoop.crypto.key.KeyProvider; import org.apache.hadoop.crypto.key.KeyProvider.KeyVersion; +import org.apache.hadoop.crypto.key.KeyProvider.Options; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion; import org.apache.hadoop.crypto.key.KeyProviderDelegationTokenExtension; @@ -338,6 +340,13 @@ public void testKMSProvider() throws Exception { UserGroupInformation.setConfiguration(conf); File confDir = getTestDir(); conf = createBaseKMSConf(confDir); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "k1.ALL", "*"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "k2.MANAGEMENT", "*"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "k2.READ", "*"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "k3.ALL", "*"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "k4.ALL", "*"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "k5.ALL", "*"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "k6.ALL", "*"); writeConf(confDir, conf); runServer(null, null, confDir, new KMSCallable() { @@ -492,10 +501,20 @@ public Void call() throws Exception { options = new KeyProvider.Options(conf); options.setCipher("AES/CTR/NoPadding"); options.setBitLength(128); - kp.createKey("k2", options); + KeyVersion kVer2 = kp.createKey("k2", options); KeyProvider.Metadata meta = kp.getMetadata("k2"); Assert.assertNull(meta.getDescription()); - Assert.assertTrue(meta.getAttributes().isEmpty()); + Assert.assertEquals("k2", meta.getAttributes().get("key.acl.name")); + + // test key ACL.. k2 is granted only MANAGEMENT Op access + try { + kpExt = + KeyProviderCryptoExtension.createKeyProviderCryptoExtension(kp); + kpExt.generateEncryptedKey(kVer2.getName()); + Assert.fail("User should not be allowed to encrypt !!"); + } catch (Exception ex) { + // + } // createKey() description, no tags options = new KeyProvider.Options(conf); @@ -505,7 +524,7 @@ public Void call() throws Exception { kp.createKey("k3", options); meta = kp.getMetadata("k3"); Assert.assertEquals("d", meta.getDescription()); - Assert.assertTrue(meta.getAttributes().isEmpty()); + Assert.assertEquals("k3", meta.getAttributes().get("key.acl.name")); Map attributes = new HashMap(); attributes.put("a", "A"); @@ -514,6 +533,7 @@ public Void call() throws Exception { options = new KeyProvider.Options(conf); options.setCipher("AES/CTR/NoPadding"); options.setBitLength(128); + attributes.put("key.acl.name", "k4"); options.setAttributes(attributes); kp.createKey("k4", options); meta = kp.getMetadata("k4"); @@ -525,6 +545,7 @@ public Void call() throws Exception { options.setCipher("AES/CTR/NoPadding"); options.setBitLength(128); options.setDescription("d"); + attributes.put("key.acl.name", "k5"); options.setAttributes(attributes); kp.createKey("k5", options); meta = kp.getMetadata("k5"); @@ -564,6 +585,201 @@ public Void call() throws Exception { }); } + @Test + public void testKeyACLs() throws Exception { + Configuration conf = new Configuration(); + conf.set("hadoop.security.authentication", "kerberos"); + UserGroupInformation.setConfiguration(conf); + final File testDir = getTestDir(); + conf = createBaseKMSConf(testDir); + conf.set("hadoop.kms.authentication.type", "kerberos"); + conf.set("hadoop.kms.authentication.kerberos.keytab", + keytab.getAbsolutePath()); + conf.set("hadoop.kms.authentication.kerberos.principal", "HTTP/localhost"); + conf.set("hadoop.kms.authentication.kerberos.name.rules", "DEFAULT"); + + for (KMSACLs.Type type : KMSACLs.Type.values()) { + conf.set(type.getAclConfigKey(), type.toString()); + } + conf.set(KMSACLs.Type.CREATE.getAclConfigKey(),"CREATE,ROLLOVER,GET,SET_KEY_MATERIAL,GENERATE_EEK"); + conf.set(KMSACLs.Type.ROLLOVER.getAclConfigKey(),"CREATE,ROLLOVER,GET,SET_KEY_MATERIAL,GENERATE_EEK"); + conf.set(KMSACLs.Type.GENERATE_EEK.getAclConfigKey(),"CREATE,ROLLOVER,GET,SET_KEY_MATERIAL,GENERATE_EEK"); + conf.set(KMSACLs.Type.DECRYPT_EEK.getAclConfigKey(),"CREATE,ROLLOVER,GET,SET_KEY_MATERIAL,GENERATE_EEK"); + + + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "test_key.MANAGEMENT", "CREATE"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "all_access.ALL", "GENERATE_EEK"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "all_access.DECRYPT_EEK", "ROLLOVER"); + conf.set(KMSConfiguration.DEFAULT_KEY_ACL_PREFIX + "MANAGEMENT", "ROLLOVER"); + + writeConf(testDir, conf); + + runServer(null, null, testDir, new KMSCallable() { + + @Override + public Void call() throws Exception { + final Configuration conf = new Configuration(); + conf.setInt(KeyProvider.DEFAULT_BITLENGTH_NAME, 128); + final URI uri = createKMSUri(getKMSUrl()); + + doAs("CREATE", new PrivilegedExceptionAction() { + @Override + public Void run() throws Exception { + KeyProvider kp = new KMSClientProvider(uri, conf); + try { + Options options = new KeyProvider.Options(conf); + Map attributes = options.getAttributes(); + HashMap newAttribs = new HashMap(attributes); + newAttribs.put("key.acl.name", "test_key"); + options.setAttributes(newAttribs); + KeyProvider.KeyVersion kv = kp.createKey("k0", options); + Assert.assertNull(kv.getMaterial()); + KeyVersion rollVersion = kp.rollNewVersion("k0"); + Assert.assertNull(rollVersion.getMaterial()); + KeyProviderCryptoExtension kpce = + KeyProviderCryptoExtension.createKeyProviderCryptoExtension(kp); + try { + kpce.generateEncryptedKey("k0"); + Assert.fail("User [CREATE] should not be allowed to generate_eek on k0"); + } catch (Exception e) { + // Ignore + } + newAttribs = new HashMap(attributes); + newAttribs.put("key.acl.name", "all_access"); + options.setAttributes(newAttribs); + try { + kp.createKey("kx", options); + Assert.fail("User [CREATE] should not be allowed to create kx"); + } catch (Exception e) { + // Ignore + } + } catch (Exception ex) { + Assert.fail(ex.getMessage()); + } + return null; + } + }); + + doAs("ROLLOVER", new PrivilegedExceptionAction() { + @Override + public Void run() throws Exception { + KeyProvider kp = new KMSClientProvider(uri, conf); + try { + Options options = new KeyProvider.Options(conf); + Map attributes = options.getAttributes(); + HashMap newAttribs = new HashMap(attributes); + newAttribs.put("key.acl.name", "test_key2"); + options.setAttributes(newAttribs); + KeyProvider.KeyVersion kv = kp.createKey("k1", options); + Assert.assertNull(kv.getMaterial()); + KeyVersion rollVersion = kp.rollNewVersion("k1"); + Assert.assertNull(rollVersion.getMaterial()); + try { + kp.rollNewVersion("k0"); + Assert.fail("User [ROLLOVER] should not be allowed to rollover k0"); + } catch (Exception e) { + // Ignore + } + KeyProviderCryptoExtension kpce = + KeyProviderCryptoExtension.createKeyProviderCryptoExtension(kp); + try { + kpce.generateEncryptedKey("k1"); + Assert.fail("User [ROLLOVER] should not be allowed to generate_eek on k1"); + } catch (Exception e) { + // Ignore + } + newAttribs = new HashMap(attributes); + newAttribs.put("key.acl.name", "all_access"); + options.setAttributes(newAttribs); + try { + kp.createKey("kx", options); + Assert.fail("User [ROLLOVER] should not be allowed to create kx"); + } catch (Exception e) { + // Ignore + } + } catch (Exception ex) { + Assert.fail(ex.getMessage()); + } + return null; + } + }); + + doAs("GET", new PrivilegedExceptionAction() { + @Override + public Void run() throws Exception { + KeyProvider kp = new KMSClientProvider(uri, conf); + try { + Options options = new KeyProvider.Options(conf); + Map attributes = options.getAttributes(); + HashMap newAttribs = new HashMap(attributes); + newAttribs.put("key.acl.name", "test_key"); + options.setAttributes(newAttribs); + try { + kp.createKey("k2", options); + Assert.fail("User [GET] should not be allowed to create key.."); + } catch (Exception e) { + // Ignore + } + newAttribs = new HashMap(attributes); + newAttribs.put("key.acl.name", "all_access"); + options.setAttributes(newAttribs); + try { + kp.createKey("kx", options); + Assert.fail("User [GET] should not be allowed to create kx"); + } catch (Exception e) { + // Ignore + } + } catch (Exception ex) { + Assert.fail(ex.getMessage()); + } + return null; + } + }); + + final EncryptedKeyVersion ekv = doAs("GENERATE_EEK", new PrivilegedExceptionAction() { + @Override + public EncryptedKeyVersion run() throws Exception { + KeyProvider kp = new KMSClientProvider(uri, conf); + try { + Options options = new KeyProvider.Options(conf); + Map attributes = options.getAttributes(); + HashMap newAttribs = new HashMap(attributes); + newAttribs.put("key.acl.name", "all_access"); + options.setAttributes(newAttribs); + kp.createKey("kx", options); + KeyProviderCryptoExtension kpce = + KeyProviderCryptoExtension.createKeyProviderCryptoExtension(kp); + try { + return kpce.generateEncryptedKey("kx"); + } catch (Exception e) { + Assert.fail("User [GENERATE_EEK] should be allowed to generate_eek on kx"); + } + } catch (Exception ex) { + Assert.fail(ex.getMessage()); + } + return null; + } + }); + + doAs("ROLLOVER", new PrivilegedExceptionAction() { + @Override + public Void run() throws Exception { + KeyProvider kp = new KMSClientProvider(uri, conf); + try { + KeyProviderCryptoExtension kpce = + KeyProviderCryptoExtension.createKeyProviderCryptoExtension(kp); + kpce.decryptEncryptedKey(ekv); + } catch (Exception ex) { + Assert.fail(ex.getMessage()); + } + return null; + } + }); + return null; + } + }); + } + @Test public void testACLs() throws Exception { Configuration conf = new Configuration(); @@ -586,6 +802,9 @@ public void testACLs() throws Exception { conf.set(KMSACLs.Type.ROLLOVER.getAclConfigKey(), KMSACLs.Type.ROLLOVER.toString() + ",SET_KEY_MATERIAL"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "k0.ALL", "*"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "k1.ALL", "*"); + writeConf(testDir, conf); runServer(null, null, testDir, new KMSCallable() { @@ -891,6 +1110,9 @@ public void testKMSBlackList() throws Exception { conf.set(KMSACLs.Type.DECRYPT_EEK.getAclConfigKey(), "client,hdfs,otheradmin"); conf.set(KMSACLs.Type.DECRYPT_EEK.getBlacklistConfigKey(), "hdfs,otheradmin"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "ck0.ALL", "*"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "ck1.ALL", "*"); + writeConf(testDir, conf); runServer(null, null, testDir, new KMSCallable() { @@ -973,6 +1195,7 @@ public void testServicePrincipalACLs() throws Exception { conf.set(type.getAclConfigKey(), " "); } conf.set(KMSACLs.Type.CREATE.getAclConfigKey(), "client"); + conf.set(KMSConfiguration.DEFAULT_KEY_ACL_PREFIX + "MANAGEMENT", "client,client/host"); writeConf(testDir, conf); @@ -1096,6 +1319,9 @@ public void testDelegationTokenAccess() throws Exception { conf.set("hadoop.kms.authentication.kerberos.principal", "HTTP/localhost"); conf.set("hadoop.kms.authentication.kerberos.name.rules", "DEFAULT"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "kA.ALL", "*"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "kD.ALL", "*"); + writeConf(testDir, conf); runServer(null, null, testDir, new KMSCallable() { @@ -1164,6 +1390,10 @@ public void testProxyUser() throws Exception { conf.set("hadoop.kms.authentication.kerberos.name.rules", "DEFAULT"); conf.set("hadoop.kms.proxyuser.client.users", "foo"); conf.set("hadoop.kms.proxyuser.client.hosts", "*"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "kAA.ALL", "*"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "kBB.ALL", "*"); + conf.set(KeyAuthorizationKeyProvider.KEY_ACL + "kCC.ALL", "*"); + writeConf(testDir, conf); runServer(null, null, testDir, new KMSCallable() { diff --git a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKeyAuthorizationKeyProvider.java b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKeyAuthorizationKeyProvider.java new file mode 100644 index 0000000000..a79926a9cd --- /dev/null +++ b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKeyAuthorizationKeyProvider.java @@ -0,0 +1,218 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.crypto.key.kms.server; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.net.URI; +import java.security.PrivilegedExceptionAction; +import java.security.SecureRandom; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.crypto.key.KeyProvider; +import org.apache.hadoop.crypto.key.KeyProvider.KeyVersion; +import org.apache.hadoop.crypto.key.KeyProvider.Options; +import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; +import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion; +import org.apache.hadoop.crypto.key.UserProvider; +import org.apache.hadoop.crypto.key.kms.server.KeyAuthorizationKeyProvider.KeyACLs; +import org.apache.hadoop.crypto.key.kms.server.KeyAuthorizationKeyProvider.KeyOpType; +import org.apache.hadoop.security.UserGroupInformation; +import org.junit.Assert; +import org.junit.Test; + +public class TestKeyAuthorizationKeyProvider { + + private static final String CIPHER = "AES"; + + @Test + public void testCreateKey() throws Exception { + final Configuration conf = new Configuration(); + KeyProvider kp = + new UserProvider.Factory().createProvider(new URI("user:///"), conf); + KeyACLs mock = mock(KeyACLs.class); + when(mock.isACLPresent("foo", KeyOpType.MANAGEMENT)).thenReturn(true); + UserGroupInformation u1 = UserGroupInformation.createRemoteUser("u1"); + when(mock.hasAccessToKey("foo", u1, KeyOpType.MANAGEMENT)).thenReturn(true); + final KeyProviderCryptoExtension kpExt = + new KeyAuthorizationKeyProvider( + KeyProviderCryptoExtension.createKeyProviderCryptoExtension(kp), + mock); + + u1.doAs( + new PrivilegedExceptionAction() { + @Override + public Void run() throws Exception { + try { + kpExt.createKey("foo", SecureRandom.getSeed(16), + newOptions(conf)); + } catch (IOException ioe) { + Assert.fail("User should be Authorized !!"); + } + + // "bar" key not configured + try { + kpExt.createKey("bar", SecureRandom.getSeed(16), + newOptions(conf)); + Assert.fail("User should NOT be Authorized !!"); + } catch (IOException ioe) { + // Ignore + } + return null; + } + } + ); + + // Unauthorized User + UserGroupInformation.createRemoteUser("badGuy").doAs( + new PrivilegedExceptionAction() { + @Override + public Void run() throws Exception { + try { + kpExt.createKey("foo", SecureRandom.getSeed(16), + newOptions(conf)); + Assert.fail("User should NOT be Authorized !!"); + } catch (IOException ioe) { + // Ignore + } + return null; + } + } + ); + } + + @Test + public void testOpsWhenACLAttributeExists() throws Exception { + final Configuration conf = new Configuration(); + KeyProvider kp = + new UserProvider.Factory().createProvider(new URI("user:///"), conf); + KeyACLs mock = mock(KeyACLs.class); + when(mock.isACLPresent("testKey", KeyOpType.MANAGEMENT)).thenReturn(true); + when(mock.isACLPresent("testKey", KeyOpType.GENERATE_EEK)).thenReturn(true); + when(mock.isACLPresent("testKey", KeyOpType.DECRYPT_EEK)).thenReturn(true); + when(mock.isACLPresent("testKey", KeyOpType.ALL)).thenReturn(true); + UserGroupInformation u1 = UserGroupInformation.createRemoteUser("u1"); + UserGroupInformation u2 = UserGroupInformation.createRemoteUser("u2"); + UserGroupInformation u3 = UserGroupInformation.createRemoteUser("u3"); + UserGroupInformation sudo = UserGroupInformation.createRemoteUser("sudo"); + when(mock.hasAccessToKey("testKey", u1, KeyOpType.MANAGEMENT)).thenReturn(true); + when(mock.hasAccessToKey("testKey", u2, KeyOpType.GENERATE_EEK)).thenReturn(true); + when(mock.hasAccessToKey("testKey", u3, KeyOpType.DECRYPT_EEK)).thenReturn(true); + when(mock.hasAccessToKey("testKey", sudo, KeyOpType.ALL)).thenReturn(true); + final KeyProviderCryptoExtension kpExt = + new KeyAuthorizationKeyProvider( + KeyProviderCryptoExtension.createKeyProviderCryptoExtension(kp), + mock); + + final KeyVersion barKv = u1.doAs( + new PrivilegedExceptionAction() { + @Override + public KeyVersion run() throws Exception { + Options opt = newOptions(conf); + Map m = new HashMap(); + m.put("key.acl.name", "testKey"); + opt.setAttributes(m); + try { + KeyVersion kv = + kpExt.createKey("foo", SecureRandom.getSeed(16), opt); + kpExt.rollNewVersion(kv.getName()); + kpExt.rollNewVersion(kv.getName(), SecureRandom.getSeed(16)); + kpExt.deleteKey(kv.getName()); + } catch (IOException ioe) { + Assert.fail("User should be Authorized !!"); + } + + KeyVersion retkv = null; + try { + retkv = kpExt.createKey("bar", SecureRandom.getSeed(16), opt); + kpExt.generateEncryptedKey(retkv.getName()); + Assert.fail("User should NOT be Authorized to generate EEK !!"); + } catch (IOException ioe) { + } + Assert.assertNotNull(retkv); + return retkv; + } + } + ); + + final EncryptedKeyVersion barEKv = + u2.doAs( + new PrivilegedExceptionAction() { + @Override + public EncryptedKeyVersion run() throws Exception { + try { + kpExt.deleteKey(barKv.getName()); + Assert.fail("User should NOT be Authorized to " + + "perform any other operation !!"); + } catch (IOException ioe) { + } + return kpExt.generateEncryptedKey(barKv.getName()); + } + }); + + u3.doAs( + new PrivilegedExceptionAction() { + @Override + public KeyVersion run() throws Exception { + try { + kpExt.deleteKey(barKv.getName()); + Assert.fail("User should NOT be Authorized to " + + "perform any other operation !!"); + } catch (IOException ioe) { + } + return kpExt.decryptEncryptedKey(barEKv); + } + }); + + sudo.doAs( + new PrivilegedExceptionAction() { + @Override + public Void run() throws Exception { + Options opt = newOptions(conf); + Map m = new HashMap(); + m.put("key.acl.name", "testKey"); + opt.setAttributes(m); + try { + KeyVersion kv = + kpExt.createKey("foo", SecureRandom.getSeed(16), opt); + kpExt.rollNewVersion(kv.getName()); + kpExt.rollNewVersion(kv.getName(), SecureRandom.getSeed(16)); + EncryptedKeyVersion ekv = kpExt.generateEncryptedKey(kv.getName()); + kpExt.decryptEncryptedKey(ekv); + kpExt.deleteKey(kv.getName()); + } catch (IOException ioe) { + Assert.fail("User should be Allowed to do everything !!"); + } + return null; + } + } + ); + } + + private static KeyProvider.Options newOptions(Configuration conf) { + KeyProvider.Options options = new KeyProvider.Options(conf); + options.setCipher(CIPHER); + options.setBitLength(128); + return options; + } + +} From 7d38ffc8d3500d428bdad5640e9e70d66ed5ea60 Mon Sep 17 00:00:00 2001 From: XuanGong Date: Wed, 10 Sep 2014 15:14:47 -0700 Subject: [PATCH 09/13] Fix CHANGES.txt. Credit Mayank Bansal for his contributions on YARN-2459 --- hadoop-yarn-project/CHANGES.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index cf5ec65e33..7968071e64 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -312,7 +312,7 @@ Release 2.6.0 - UNRELEASED (Zhihai Xu via kasha) YARN-2459. RM crashes if App gets rejected for any reason - and HA is enabled. (Jian He via xgong) + and HA is enabled. (Jian He and Mayank Bansal via xgong) YARN-2158. Fixed TestRMWebServicesAppsModification#testSingleAppKill test failure. (Varun Vasudev via jianhe) From 7f80e142096ab21b4852635a127589e34a747bc4 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Wed, 10 Sep 2014 22:10:36 +0000 Subject: [PATCH 10/13] MAPREDUCE-6075. HistoryServerFileSystemStateStore can create zero-length files. Contributed by Jason Lowe --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../v2/hs/HistoryServerFileSystemStateStoreService.java | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index a5e5920aa7..490c0a1d4d 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -276,6 +276,9 @@ Release 2.6.0 - UNRELEASED MAPREDUCE-6071. JobImpl#makeUberDecision doesn't log that Uber mode is disabled because of too much CPUs (Tsuyoshi OZAWA via jlowe) + MAPREDUCE-6075. HistoryServerFileSystemStateStore can create zero-length + files (jlowe) + Release 2.5.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryServerFileSystemStateStoreService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryServerFileSystemStateStoreService.java index dcea333b5f..9902f5ea8f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryServerFileSystemStateStoreService.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryServerFileSystemStateStoreService.java @@ -189,6 +189,8 @@ public void storeTokenMasterKey(DelegationKey key) throws IOException { DataOutputStream dataStream = new DataOutputStream(memStream); try { key.write(dataStream); + dataStream.close(); + dataStream = null; } finally { IOUtils.cleanup(LOG, dataStream); } @@ -260,6 +262,8 @@ private void writeFile(Path file, byte[] data) throws IOException { try { try { out.write(data); + out.close(); + out = null; } finally { IOUtils.cleanup(LOG, out); } @@ -299,6 +303,8 @@ private byte[] buildTokenData(MRDelegationTokenIdentifier tokenId, try { tokenId.write(dataStream); dataStream.writeLong(renewDate); + dataStream.close(); + dataStream = null; } finally { IOUtils.cleanup(LOG, dataStream); } From 5ec7fcd9dd6bb86858c6e2583321bb9a615bd392 Mon Sep 17 00:00:00 2001 From: Colin Patrick Mccabe Date: Wed, 10 Sep 2014 16:14:08 -0700 Subject: [PATCH 11/13] HADOOP-11074. Move s3-related FS connector code to hadoop-aws. (David S. Wang via Colin Patrick McCabe) --- .../services/org.apache.hadoop.fs.FileSystem | 2 - hadoop-project/pom.xml | 12 + .../dev-support/findbugs-exclude.xml | 358 +++++++++++++++++- .../java/org/apache/hadoop/fs/s3/Block.java | 0 .../apache/hadoop/fs/s3/FileSystemStore.java | 0 .../java/org/apache/hadoop/fs/s3/INode.java | 0 .../hadoop/fs/s3/Jets3tFileSystemStore.java | 0 .../apache/hadoop/fs/s3/MigrationTool.java | 0 .../apache/hadoop/fs/s3/S3Credentials.java | 0 .../org/apache/hadoop/fs/s3/S3Exception.java | 0 .../org/apache/hadoop/fs/s3/S3FileSystem.java | 0 .../hadoop/fs/s3/S3FileSystemConfigKeys.java | 0 .../hadoop/fs/s3/S3FileSystemException.java | 0 .../apache/hadoop/fs/s3/S3InputStream.java | 0 .../apache/hadoop/fs/s3/S3OutputStream.java | 0 .../fs/s3/VersionMismatchException.java | 0 .../java/org/apache/hadoop/fs/s3/package.html | 0 .../hadoop/fs/s3native/FileMetadata.java | 0 .../s3native/Jets3tNativeFileSystemStore.java | 0 .../fs/s3native/NativeFileSystemStore.java | 0 .../fs/s3native/NativeS3FileSystem.java | 0 .../hadoop/fs/s3native/PartialListing.java | 0 .../S3NativeFileSystemConfigKeys.java | 0 .../apache/hadoop/fs/s3native/package.html | 0 .../services/org.apache.hadoop.fs.FileSystem | 17 + .../fs/contract/s3n/NativeS3Contract.java | 0 .../contract/s3n/TestS3NContractCreate.java | 0 .../contract/s3n/TestS3NContractDelete.java | 0 .../fs/contract/s3n/TestS3NContractMkdir.java | 0 .../fs/contract/s3n/TestS3NContractOpen.java | 0 .../contract/s3n/TestS3NContractRename.java | 0 .../contract/s3n/TestS3NContractRootDir.java | 0 .../fs/contract/s3n/TestS3NContractSeek.java | 0 .../hadoop/fs/s3/InMemoryFileSystemStore.java | 0 .../fs/s3/Jets3tS3FileSystemContractTest.java | 0 .../fs/s3/S3FileSystemContractBaseTest.java | 9 - .../hadoop/fs/s3/S3InMemoryFileSystem.java | 0 .../org/apache/hadoop/fs/s3/TestINode.java | 0 .../s3/TestInMemoryS3FileSystemContract.java | 0 .../hadoop/fs/s3/TestS3Credentials.java | 0 .../apache/hadoop/fs/s3/TestS3FileSystem.java | 0 .../fs/s3/TestS3InMemoryFileSystem.java | 0 .../InMemoryNativeFileSystemStore.java | 0 .../Jets3tNativeS3FileSystemContractTest.java | 0 .../NativeS3FileSystemContractBaseTest.java | 0 .../fs/s3native/S3NInMemoryFileSystem.java | 0 ...estInMemoryNativeS3FileSystemContract.java | 0 .../TestJets3tNativeFileSystemStore.java | 0 .../s3native/TestS3NInMemoryFileSystem.java | 0 .../src/test/resources/contract/s3n.xml | 0 hadoop-tools/hadoop-tools-dist/pom.xml | 6 + 51 files changed, 392 insertions(+), 12 deletions(-) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3/Block.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3/FileSystemStore.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3/INode.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3/Jets3tFileSystemStore.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3/MigrationTool.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3/S3Credentials.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3/S3Exception.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3/S3FileSystem.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3/S3FileSystemConfigKeys.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3/S3FileSystemException.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3/S3InputStream.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3/S3OutputStream.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3/VersionMismatchException.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3/package.html (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3native/FileMetadata.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3native/PartialListing.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3native/S3NativeFileSystemConfigKeys.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/main/java/org/apache/hadoop/fs/s3native/package.html (100%) create mode 100644 hadoop-tools/hadoop-aws/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/contract/s3n/NativeS3Contract.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractCreate.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractDelete.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractMkdir.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractOpen.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractRename.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractRootDir.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractSeek.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3/InMemoryFileSystemStore.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3/Jets3tS3FileSystemContractTest.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java (83%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3/S3InMemoryFileSystem.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3/TestINode.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3/TestInMemoryS3FileSystemContract.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3/TestS3Credentials.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3/TestS3FileSystem.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3/TestS3InMemoryFileSystem.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3native/InMemoryNativeFileSystemStore.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3native/Jets3tNativeS3FileSystemContractTest.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3native/S3NInMemoryFileSystem.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3native/TestInMemoryNativeS3FileSystemContract.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3native/TestJets3tNativeFileSystemStore.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/java/org/apache/hadoop/fs/s3native/TestS3NInMemoryFileSystem.java (100%) rename {hadoop-common-project/hadoop-common => hadoop-tools/hadoop-aws}/src/test/resources/contract/s3n.xml (100%) diff --git a/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem b/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem index 64632e412e..17ffa7fe72 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem +++ b/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem @@ -15,7 +15,5 @@ org.apache.hadoop.fs.LocalFileSystem org.apache.hadoop.fs.viewfs.ViewFileSystem -org.apache.hadoop.fs.s3.S3FileSystem -org.apache.hadoop.fs.s3native.NativeS3FileSystem org.apache.hadoop.fs.ftp.FTPFileSystem org.apache.hadoop.fs.HarFileSystem diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 76448cfcad..ad8422ff5e 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -327,6 +327,12 @@ ${project.version} + + org.apache.hadoop + hadoop-aws + ${project.version} + + com.google.guava guava @@ -576,6 +582,12 @@ com.amazonaws aws-java-sdk 1.7.2 + + + com.fasterxml.jackson.core + jackson-core + + org.apache.mina diff --git a/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml b/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml index 74e4923bf7..204e6abeae 100644 --- a/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml +++ b/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml @@ -15,5 +15,361 @@ limitations under the License. --> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/Block.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/Block.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/Block.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/Block.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/FileSystemStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/FileSystemStore.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/FileSystemStore.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/FileSystemStore.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/INode.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/INode.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/INode.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/INode.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/Jets3tFileSystemStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/Jets3tFileSystemStore.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/Jets3tFileSystemStore.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/Jets3tFileSystemStore.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/MigrationTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/MigrationTool.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/MigrationTool.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/MigrationTool.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3Credentials.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/S3Credentials.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3Credentials.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/S3Credentials.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3Exception.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/S3Exception.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3Exception.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/S3Exception.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3FileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/S3FileSystem.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3FileSystem.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/S3FileSystem.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3FileSystemConfigKeys.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/S3FileSystemConfigKeys.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3FileSystemConfigKeys.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/S3FileSystemConfigKeys.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3FileSystemException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/S3FileSystemException.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3FileSystemException.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/S3FileSystemException.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3InputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/S3InputStream.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3InputStream.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/S3InputStream.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3OutputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/S3OutputStream.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3OutputStream.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/S3OutputStream.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/VersionMismatchException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/VersionMismatchException.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/VersionMismatchException.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/VersionMismatchException.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/package.html b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/package.html similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/package.html rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3/package.html diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/FileMetadata.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/FileMetadata.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/FileMetadata.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/FileMetadata.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/PartialListing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/PartialListing.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/PartialListing.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/PartialListing.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/S3NativeFileSystemConfigKeys.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3NativeFileSystemConfigKeys.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/S3NativeFileSystemConfigKeys.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3NativeFileSystemConfigKeys.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/package.html b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/package.html similarity index 100% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/package.html rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/package.html diff --git a/hadoop-tools/hadoop-aws/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem b/hadoop-tools/hadoop-aws/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem new file mode 100644 index 0000000000..3cd1d6b2b8 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.hadoop.fs.s3.S3FileSystem +org.apache.hadoop.fs.s3native.NativeS3FileSystem diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/NativeS3Contract.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/NativeS3Contract.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/NativeS3Contract.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/NativeS3Contract.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractCreate.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractCreate.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractCreate.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractCreate.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractDelete.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractDelete.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractDelete.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractDelete.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractMkdir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractMkdir.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractMkdir.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractMkdir.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractOpen.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractOpen.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractOpen.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractOpen.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractRename.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractRename.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractRename.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractRename.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractRootDir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractRootDir.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractRootDir.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractRootDir.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractSeek.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractSeek.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractSeek.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3n/TestS3NContractSeek.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/InMemoryFileSystemStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/InMemoryFileSystemStore.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/InMemoryFileSystemStore.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/InMemoryFileSystemStore.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/Jets3tS3FileSystemContractTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/Jets3tS3FileSystemContractTest.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/Jets3tS3FileSystemContractTest.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/Jets3tS3FileSystemContractTest.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java similarity index 83% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java index d704b006be..28b0507f0f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java @@ -46,15 +46,6 @@ protected void tearDown() throws Exception { super.tearDown(); } - public void testBlockSize() throws Exception { - Path file = path("/test/hadoop/file"); - long newBlockSize = fs.getDefaultBlockSize(file) * 2; - fs.getConf().setLong("fs.s3.block.size", newBlockSize); - createFile(file); - assertEquals("Double default block size", newBlockSize, - fs.getFileStatus(file).getBlockSize()); - } - public void testCanonicalName() throws Exception { assertNull("s3 doesn't support security token and shouldn't have canonical name", fs.getCanonicalServiceName()); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/S3InMemoryFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/S3InMemoryFileSystem.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/S3InMemoryFileSystem.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/S3InMemoryFileSystem.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/TestINode.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/TestINode.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/TestINode.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/TestINode.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/TestInMemoryS3FileSystemContract.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/TestInMemoryS3FileSystemContract.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/TestInMemoryS3FileSystemContract.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/TestInMemoryS3FileSystemContract.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/TestS3Credentials.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/TestS3Credentials.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/TestS3Credentials.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/TestS3Credentials.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/TestS3FileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/TestS3FileSystem.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/TestS3FileSystem.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/TestS3FileSystem.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/TestS3InMemoryFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/TestS3InMemoryFileSystem.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/TestS3InMemoryFileSystem.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/TestS3InMemoryFileSystem.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/InMemoryNativeFileSystemStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/InMemoryNativeFileSystemStore.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/InMemoryNativeFileSystemStore.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/InMemoryNativeFileSystemStore.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/Jets3tNativeS3FileSystemContractTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/Jets3tNativeS3FileSystemContractTest.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/Jets3tNativeS3FileSystemContractTest.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/Jets3tNativeS3FileSystemContractTest.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/S3NInMemoryFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/S3NInMemoryFileSystem.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/S3NInMemoryFileSystem.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/S3NInMemoryFileSystem.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/TestInMemoryNativeS3FileSystemContract.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestInMemoryNativeS3FileSystemContract.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/TestInMemoryNativeS3FileSystemContract.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestInMemoryNativeS3FileSystemContract.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/TestJets3tNativeFileSystemStore.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestJets3tNativeFileSystemStore.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/TestJets3tNativeFileSystemStore.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestJets3tNativeFileSystemStore.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/TestS3NInMemoryFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestS3NInMemoryFileSystem.java similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/TestS3NInMemoryFileSystem.java rename to hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestS3NInMemoryFileSystem.java diff --git a/hadoop-common-project/hadoop-common/src/test/resources/contract/s3n.xml b/hadoop-tools/hadoop-aws/src/test/resources/contract/s3n.xml similarity index 100% rename from hadoop-common-project/hadoop-common/src/test/resources/contract/s3n.xml rename to hadoop-tools/hadoop-aws/src/test/resources/contract/s3n.xml diff --git a/hadoop-tools/hadoop-tools-dist/pom.xml b/hadoop-tools/hadoop-tools-dist/pom.xml index 38043f7bce..7a01085954 100644 --- a/hadoop-tools/hadoop-tools-dist/pom.xml +++ b/hadoop-tools/hadoop-tools-dist/pom.xml @@ -83,6 +83,12 @@ compile ${project.version} + + org.apache.hadoop + hadoop-aws + compile + ${project.version} + org.apache.hadoop hadoop-azure From 83be3ad44484bf8a24cb90de4b9c26ab59d226a8 Mon Sep 17 00:00:00 2001 From: Jian He Date: Wed, 10 Sep 2014 18:19:53 -0700 Subject: [PATCH 12/13] YARN-415. Capture aggregate memory allocation at the app-level for chargeback. Contributed by Eric Payne & Andrey Klochkov --- hadoop-yarn-project/CHANGES.txt | 3 + .../ApplicationResourceUsageReport.java | 41 +- .../src/main/proto/yarn_protos.proto | 2 + .../yarn/client/cli/ApplicationCLI.java | 6 + .../hadoop/yarn/client/cli/TestYarnCLI.java | 8 +- .../ApplicationResourceUsageReportPBImpl.java | 24 ++ .../yarn/server/utils/BuilderUtils.java | 5 +- .../server/resourcemanager/RMServerUtils.java | 2 +- .../recovery/FileSystemRMStateStore.java | 4 +- .../recovery/MemoryRMStateStore.java | 9 +- .../recovery/RMStateStore.java | 24 +- .../recovery/ZKRMStateStore.java | 4 +- .../records/ApplicationAttemptStateData.java | 32 +- .../pb/ApplicationAttemptStateDataPBImpl.java | 24 ++ .../resourcemanager/rmapp/RMAppImpl.java | 17 +- .../resourcemanager/rmapp/RMAppMetrics.java | 15 +- .../attempt/AggregateAppResourceUsage.java | 60 +++ .../rmapp/attempt/RMAppAttemptImpl.java | 16 +- .../rmapp/attempt/RMAppAttemptMetrics.java | 36 +- .../rmcontainer/RMContainerImpl.java | 26 +- .../SchedulerApplicationAttempt.java | 41 +- .../common/fica/FiCaSchedulerApp.java | 3 + .../scheduler/fair/FSAppAttempt.java | 3 + .../resourcemanager/webapp/AppBlock.java | 5 +- .../resourcemanager/webapp/dao/AppInfo.java | 12 + ...yarn_server_resourcemanager_recovery.proto | 4 +- .../resourcemanager/TestClientRMService.java | 60 ++- .../TestContainerResourceUsage.java | 401 ++++++++++++++++++ .../applicationsmanager/MockAsm.java | 19 +- .../recovery/RMStateStoreTestBase.java | 18 +- .../rmapp/TestRMAppTransitions.java | 16 +- .../attempt/TestRMAppAttemptTransitions.java | 72 +++- .../rmcontainer/TestRMContainerImpl.java | 11 + .../scheduler/capacity/TestLeafQueue.java | 89 ++-- .../scheduler/fair/FairSchedulerTestBase.java | 4 +- .../webapp/TestRMWebAppFairScheduler.java | 2 +- .../webapp/TestRMWebServicesApps.java | 2 +- .../src/site/apt/ResourceManagerRest.apt.vm | 22 +- 38 files changed, 1045 insertions(+), 97 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 7968071e64..b28f8f823f 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -67,6 +67,9 @@ Release 2.6.0 - UNRELEASED YARN-2394. FairScheduler: Configure fairSharePreemptionThreshold per queue. (Wei Yan via kasha) + YARN-415. Capture aggregate memory allocation at the app-level for chargeback. + (Eric Payne & Andrey Klochkov via jianhe) + IMPROVEMENTS YARN-2197. Add a link to YARN CHANGES.txt in the left side of doc diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java index 6e9c76fb01..b20d8322d2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java @@ -35,7 +35,8 @@ public abstract class ApplicationResourceUsageReport { @Unstable public static ApplicationResourceUsageReport newInstance( int numUsedContainers, int numReservedContainers, Resource usedResources, - Resource reservedResources, Resource neededResources) { + Resource reservedResources, Resource neededResources, long memorySeconds, + long vcoreSeconds) { ApplicationResourceUsageReport report = Records.newRecord(ApplicationResourceUsageReport.class); report.setNumUsedContainers(numUsedContainers); @@ -43,6 +44,8 @@ public static ApplicationResourceUsageReport newInstance( report.setUsedResources(usedResources); report.setReservedResources(reservedResources); report.setNeededResources(neededResources); + report.setMemorySeconds(memorySeconds); + report.setVcoreSeconds(vcoreSeconds); return report; } @@ -113,4 +116,40 @@ public static ApplicationResourceUsageReport newInstance( @Private @Unstable public abstract void setNeededResources(Resource needed_resources); + + /** + * Set the aggregated amount of memory (in megabytes) the application has + * allocated times the number of seconds the application has been running. + * @param memory_seconds the aggregated amount of memory seconds + */ + @Private + @Unstable + public abstract void setMemorySeconds(long memory_seconds); + + /** + * Get the aggregated amount of memory (in megabytes) the application has + * allocated times the number of seconds the application has been running. + * @return the aggregated amount of memory seconds + */ + @Public + @Unstable + public abstract long getMemorySeconds(); + + /** + * Set the aggregated number of vcores that the application has allocated + * times the number of seconds the application has been running. + * @param vcore_seconds the aggregated number of vcore seconds + */ + @Private + @Unstable + public abstract void setVcoreSeconds(long vcore_seconds); + + /** + * Get the aggregated number of vcores that the application has allocated + * times the number of seconds the application has been running. + * @return the aggregated number of vcore seconds + */ + @Public + @Unstable + public abstract long getVcoreSeconds(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index 3f1fa6ce0b..7e7f21b6a4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -167,6 +167,8 @@ message ApplicationResourceUsageReportProto { optional ResourceProto used_resources = 3; optional ResourceProto reserved_resources = 4; optional ResourceProto needed_resources = 5; + optional int64 memory_seconds = 6; + optional int64 vcore_seconds = 7; } message ApplicationReportProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java index 7d61a23021..54cfe91eaa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java @@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.ContainerReport; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; @@ -460,6 +461,11 @@ private void printApplicationReport(String applicationId) appReportStr.println(appReport.getRpcPort()); appReportStr.print("\tAM Host : "); appReportStr.println(appReport.getHost()); + appReportStr.print("\tAggregate Resource Allocation : "); + + ApplicationResourceUsageReport usageReport = appReport.getApplicationResourceUsageReport(); + appReportStr.print(usageReport.getMemorySeconds() + " MB-seconds, "); + appReportStr.println(usageReport.getVcoreSeconds() + " vcore-seconds"); appReportStr.print("\tDiagnostics : "); appReportStr.print(appReport.getDiagnostics()); } else { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java index b408b61ce4..47fa5ec6d0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java @@ -46,6 +46,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerReport; import org.apache.hadoop.yarn.api.records.ContainerState; @@ -87,11 +88,15 @@ public void setup() { public void testGetApplicationReport() throws Exception { ApplicationCLI cli = createAndGetAppCLI(); ApplicationId applicationId = ApplicationId.newInstance(1234, 5); + ApplicationResourceUsageReport usageReport = + ApplicationResourceUsageReport.newInstance( + 2, 0, null, null, null, 123456, 4567); ApplicationReport newApplicationReport = ApplicationReport.newInstance( applicationId, ApplicationAttemptId.newInstance(applicationId, 1), "user", "queue", "appname", "host", 124, null, YarnApplicationState.FINISHED, "diagnostics", "url", 0, 0, - FinalApplicationStatus.SUCCEEDED, null, "N/A", 0.53789f, "YARN", null); + FinalApplicationStatus.SUCCEEDED, usageReport, "N/A", 0.53789f, "YARN", + null); when(client.getApplicationReport(any(ApplicationId.class))).thenReturn( newApplicationReport); int result = cli.run(new String[] { "application", "-status", applicationId.toString() }); @@ -113,6 +118,7 @@ public void testGetApplicationReport() throws Exception { pw.println("\tTracking-URL : N/A"); pw.println("\tRPC Port : 124"); pw.println("\tAM Host : host"); + pw.println("\tAggregate Resource Allocation : 123456 MB-seconds, 4567 vcore-seconds"); pw.println("\tDiagnostics : diagnostics"); pw.close(); String appReportStr = baos.toString("UTF-8"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java index ada716593e..1cabaddcdc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationResourceUsageReportPBImpl.java @@ -200,6 +200,30 @@ public synchronized void setNeededResources(Resource reserved_resources) { this.neededResources = reserved_resources; } + @Override + public synchronized void setMemorySeconds(long memory_seconds) { + maybeInitBuilder(); + builder.setMemorySeconds(memory_seconds); + } + + @Override + public synchronized long getMemorySeconds() { + ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder; + return p.getMemorySeconds(); + } + + @Override + public synchronized void setVcoreSeconds(long vcore_seconds) { + maybeInitBuilder(); + builder.setVcoreSeconds(vcore_seconds); + } + + @Override + public synchronized long getVcoreSeconds() { + ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder; + return (p.getVcoreSeconds()); + } + private ResourcePBImpl convertFromProtoFormat(ResourceProto p) { return new ResourcePBImpl(p); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java index 64eb428668..0cfd911f83 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java @@ -370,7 +370,8 @@ public static ApplicationSubmissionContext newApplicationSubmissionContext( public static ApplicationResourceUsageReport newApplicationResourceUsageReport( int numUsedContainers, int numReservedContainers, Resource usedResources, - Resource reservedResources, Resource neededResources) { + Resource reservedResources, Resource neededResources, long memorySeconds, + long vcoreSeconds) { ApplicationResourceUsageReport report = recordFactory.newRecordInstance(ApplicationResourceUsageReport.class); report.setNumUsedContainers(numUsedContainers); @@ -378,6 +379,8 @@ public static ApplicationResourceUsageReport newApplicationResourceUsageReport( report.setUsedResources(usedResources); report.setReservedResources(reservedResources); report.setNeededResources(neededResources); + report.setMemorySeconds(memorySeconds); + report.setVcoreSeconds(vcoreSeconds); return report; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java index d93c45d0d7..29c5953a2a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java @@ -236,5 +236,5 @@ public static YarnApplicationAttemptState createApplicationAttemptState( DUMMY_APPLICATION_RESOURCE_USAGE_REPORT = BuilderUtils.newApplicationResourceUsageReport(-1, -1, Resources.createResource(-1, -1), Resources.createResource(-1, -1), - Resources.createResource(-1, -1)); + Resources.createResource(-1, -1), 0, 0); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java index 0a3b269c97..4c01a618c8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java @@ -280,7 +280,9 @@ private void loadRMAppState(RMState rmState) throws Exception { attemptStateData.getFinalTrackingUrl(), attemptStateData.getDiagnostics(), attemptStateData.getFinalApplicationStatus(), - attemptStateData.getAMContainerExitStatus()); + attemptStateData.getAMContainerExitStatus(), + attemptStateData.getMemorySeconds(), + attemptStateData.getVcoreSeconds()); // assert child node name is same as application attempt id assert attemptId.equals(attemptState.getAttemptId()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java index f56517cd82..efaa039b94 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java @@ -138,7 +138,10 @@ public synchronized void storeApplicationAttemptStateInternal( ApplicationAttemptState attemptState = new ApplicationAttemptState(appAttemptId, attemptStateData.getMasterContainer(), credentials, - attemptStateData.getStartTime()); + attemptStateData.getStartTime(), + attemptStateData.getMemorySeconds(), + attemptStateData.getVcoreSeconds()); + ApplicationState appState = state.getApplicationState().get( attemptState.getAttemptId().getApplicationId()); @@ -167,7 +170,9 @@ public synchronized void updateApplicationAttemptStateInternal( attemptStateData.getFinalTrackingUrl(), attemptStateData.getDiagnostics(), attemptStateData.getFinalApplicationStatus(), - attemptStateData.getAMContainerExitStatus()); + attemptStateData.getAMContainerExitStatus(), + attemptStateData.getMemorySeconds(), + attemptStateData.getVcoreSeconds()); ApplicationState appState = state.getApplicationState().get( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java index 714a108ece..3074d337a5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java @@ -55,6 +55,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; @@ -265,19 +266,21 @@ public static class ApplicationAttemptState { String diagnostics; int exitStatus = ContainerExitStatus.INVALID; FinalApplicationStatus amUnregisteredFinalStatus; + long memorySeconds; + long vcoreSeconds; public ApplicationAttemptState(ApplicationAttemptId attemptId, Container masterContainer, Credentials appAttemptCredentials, - long startTime) { + long startTime, long memorySeconds, long vcoreSeconds) { this(attemptId, masterContainer, appAttemptCredentials, startTime, null, - null, "", null, ContainerExitStatus.INVALID); + null, "", null, ContainerExitStatus.INVALID, memorySeconds, vcoreSeconds); } public ApplicationAttemptState(ApplicationAttemptId attemptId, Container masterContainer, Credentials appAttemptCredentials, long startTime, RMAppAttemptState state, String finalTrackingUrl, String diagnostics, FinalApplicationStatus amUnregisteredFinalStatus, - int exitStatus) { + int exitStatus, long memorySeconds, long vcoreSeconds) { this.attemptId = attemptId; this.masterContainer = masterContainer; this.appAttemptCredentials = appAttemptCredentials; @@ -287,6 +290,8 @@ public ApplicationAttemptState(ApplicationAttemptId attemptId, this.diagnostics = diagnostics == null ? "" : diagnostics; this.amUnregisteredFinalStatus = amUnregisteredFinalStatus; this.exitStatus = exitStatus; + this.memorySeconds = memorySeconds; + this.vcoreSeconds = vcoreSeconds; } public Container getMasterContainer() { @@ -316,6 +321,12 @@ public FinalApplicationStatus getFinalApplicationStatus() { public int getAMContainerExitStatus(){ return this.exitStatus; } + public long getMemorySeconds() { + return memorySeconds; + } + public long getVcoreSeconds() { + return vcoreSeconds; + } } /** @@ -587,10 +598,13 @@ protected abstract void updateApplicationStateInternal(ApplicationId appId, public synchronized void storeNewApplicationAttempt(RMAppAttempt appAttempt) { Credentials credentials = getCredentialsFromAppAttempt(appAttempt); + AggregateAppResourceUsage resUsage = + appAttempt.getRMAppAttemptMetrics().getAggregateAppResourceUsage(); ApplicationAttemptState attemptState = new ApplicationAttemptState(appAttempt.getAppAttemptId(), appAttempt.getMasterContainer(), credentials, - appAttempt.getStartTime()); + appAttempt.getStartTime(), resUsage.getMemorySeconds(), + resUsage.getVcoreSeconds()); dispatcher.getEventHandler().handle( new RMStateStoreAppAttemptEvent(attemptState)); @@ -746,7 +760,7 @@ public synchronized void removeApplication(RMApp app) { ApplicationAttemptState attemptState = new ApplicationAttemptState(appAttempt.getAppAttemptId(), appAttempt.getMasterContainer(), credentials, - appAttempt.getStartTime()); + appAttempt.getStartTime(), 0, 0); appState.attempts.put(attemptState.getAttemptId(), attemptState); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java index 1b1ec7629b..25f3819087 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java @@ -603,7 +603,9 @@ private void loadApplicationAttemptState(ApplicationState appState, attemptStateData.getFinalTrackingUrl(), attemptStateData.getDiagnostics(), attemptStateData.getFinalApplicationStatus(), - attemptStateData.getAMContainerExitStatus()); + attemptStateData.getAMContainerExitStatus(), + attemptStateData.getMemorySeconds(), + attemptStateData.getVcoreSeconds()); appState.attempts.put(attemptState.getAttemptId(), attemptState); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java index 5cb9787fac..ad8cdae438 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/ApplicationAttemptStateData.java @@ -43,7 +43,8 @@ public static ApplicationAttemptStateData newInstance( ApplicationAttemptId attemptId, Container container, ByteBuffer attemptTokens, long startTime, RMAppAttemptState finalState, String finalTrackingUrl, String diagnostics, - FinalApplicationStatus amUnregisteredFinalStatus, int exitStatus) { + FinalApplicationStatus amUnregisteredFinalStatus, int exitStatus, + long memorySeconds, long vcoreSeconds) { ApplicationAttemptStateData attemptStateData = Records.newRecord(ApplicationAttemptStateData.class); attemptStateData.setAttemptId(attemptId); @@ -55,6 +56,8 @@ public static ApplicationAttemptStateData newInstance( attemptStateData.setStartTime(startTime); attemptStateData.setFinalApplicationStatus(amUnregisteredFinalStatus); attemptStateData.setAMContainerExitStatus(exitStatus); + attemptStateData.setMemorySeconds(memorySeconds); + attemptStateData.setVcoreSeconds(vcoreSeconds); return attemptStateData; } @@ -72,7 +75,8 @@ public static ApplicationAttemptStateData newInstance( attemptState.getStartTime(), attemptState.getState(), attemptState.getFinalTrackingUrl(), attemptState.getDiagnostics(), attemptState.getFinalApplicationStatus(), - attemptState.getAMContainerExitStatus()); + attemptState.getAMContainerExitStatus(), + attemptState.getMemorySeconds(), attemptState.getVcoreSeconds()); } public abstract ApplicationAttemptStateDataProto getProto(); @@ -157,4 +161,28 @@ public abstract void setFinalApplicationStatus( public abstract int getAMContainerExitStatus(); public abstract void setAMContainerExitStatus(int exitStatus); + + /** + * Get the memory seconds (in MB seconds) of the application. + * @return memory seconds (in MB seconds) of the application + */ + @Public + @Unstable + public abstract long getMemorySeconds(); + + @Public + @Unstable + public abstract void setMemorySeconds(long memorySeconds); + + /** + * Get the vcore seconds of the application. + * @return vcore seconds of the application + */ + @Public + @Unstable + public abstract long getVcoreSeconds(); + + @Public + @Unstable + public abstract void setVcoreSeconds(long vcoreSeconds); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java index 5c62d634c3..4d6212d136 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/records/impl/pb/ApplicationAttemptStateDataPBImpl.java @@ -228,6 +228,30 @@ public void setStartTime(long startTime) { builder.setStartTime(startTime); } + @Override + public long getMemorySeconds() { + ApplicationAttemptStateDataProtoOrBuilder p = viaProto ? proto : builder; + return p.getMemorySeconds(); + } + + @Override + public long getVcoreSeconds() { + ApplicationAttemptStateDataProtoOrBuilder p = viaProto ? proto : builder; + return p.getVcoreSeconds(); + } + + @Override + public void setMemorySeconds(long memorySeconds) { + maybeInitBuilder(); + builder.setMemorySeconds(memorySeconds); + } + + @Override + public void setVcoreSeconds(long vcoreSeconds) { + maybeInitBuilder(); + builder.setVcoreSeconds(vcoreSeconds); + } + @Override public FinalApplicationStatus getFinalApplicationStatus() { ApplicationAttemptStateDataProtoOrBuilder p = viaProto ? proto : builder; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 0b81f96af8..5b6df00eb6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -62,6 +62,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppNodeUpdateEvent.RMAppNodeUpdateType; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; @@ -561,6 +562,10 @@ public ApplicationReport createAndGetApplicationReport(String clientUserName, } } } + + RMAppMetrics rmAppMetrics = getRMAppMetrics(); + appUsageReport.setMemorySeconds(rmAppMetrics.getMemorySeconds()); + appUsageReport.setVcoreSeconds(rmAppMetrics.getVcoreSeconds()); } if (currentApplicationAttemptId == null) { @@ -1117,7 +1122,6 @@ public AttemptFailedTransition(RMAppState initialState) { @Override public RMAppState transition(RMAppImpl app, RMAppEvent event) { - if (!app.submissionContext.getUnmanagedAM() && app.getNumFailedAppAttempts() < app.maxAppAttempts) { boolean transferStateFromPreviousAttempt = false; @@ -1199,6 +1203,8 @@ public RMAppMetrics getRMAppMetrics() { Resource resourcePreempted = Resource.newInstance(0, 0); int numAMContainerPreempted = 0; int numNonAMContainerPreempted = 0; + long memorySeconds = 0; + long vcoreSeconds = 0; for (RMAppAttempt attempt : attempts.values()) { if (null != attempt) { RMAppAttemptMetrics attemptMetrics = @@ -1208,10 +1214,17 @@ public RMAppMetrics getRMAppMetrics() { numAMContainerPreempted += attemptMetrics.getIsPreempted() ? 1 : 0; numNonAMContainerPreempted += attemptMetrics.getNumNonAMContainersPreempted(); + // getAggregateAppResourceUsage() will calculate resource usage stats + // for both running and finished containers. + AggregateAppResourceUsage resUsage = + attempt.getRMAppAttemptMetrics().getAggregateAppResourceUsage(); + memorySeconds += resUsage.getMemorySeconds(); + vcoreSeconds += resUsage.getVcoreSeconds(); } } return new RMAppMetrics(resourcePreempted, - numNonAMContainerPreempted, numAMContainerPreempted); + numNonAMContainerPreempted, numAMContainerPreempted, + memorySeconds, vcoreSeconds); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java index 645db1631e..50914705ca 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppMetrics.java @@ -24,12 +24,17 @@ public class RMAppMetrics { final Resource resourcePreempted; final int numNonAMContainersPreempted; final int numAMContainersPreempted; + final long memorySeconds; + final long vcoreSeconds; public RMAppMetrics(Resource resourcePreempted, - int numNonAMContainersPreempted, int numAMContainersPreempted) { + int numNonAMContainersPreempted, int numAMContainersPreempted, + long memorySeconds, long vcoreSeconds) { this.resourcePreempted = resourcePreempted; this.numNonAMContainersPreempted = numNonAMContainersPreempted; this.numAMContainersPreempted = numAMContainersPreempted; + this.memorySeconds = memorySeconds; + this.vcoreSeconds = vcoreSeconds; } public Resource getResourcePreempted() { @@ -43,4 +48,12 @@ public int getNumNonAMContainersPreempted() { public int getNumAMContainersPreempted() { return numAMContainersPreempted; } + + public long getMemorySeconds() { + return memorySeconds; + } + + public long getVcoreSeconds() { + return vcoreSeconds; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java new file mode 100644 index 0000000000..f0c2b348c3 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/AggregateAppResourceUsage.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt; + +import org.apache.hadoop.classification.InterfaceAudience.Private; + +@Private +public class AggregateAppResourceUsage { + long memorySeconds; + long vcoreSeconds; + + public AggregateAppResourceUsage(long memorySeconds, long vcoreSeconds) { + this.memorySeconds = memorySeconds; + this.vcoreSeconds = vcoreSeconds; + } + + /** + * @return the memorySeconds + */ + public long getMemorySeconds() { + return memorySeconds; + } + + /** + * @param memorySeconds the memorySeconds to set + */ + public void setMemorySeconds(long memorySeconds) { + this.memorySeconds = memorySeconds; + } + + /** + * @return the vcoreSeconds + */ + public long getVcoreSeconds() { + return vcoreSeconds; + } + + /** + * @param vcoreSeconds the vcoreSeconds to set + */ + public void setVcoreSeconds(long vcoreSeconds) { + this.vcoreSeconds = vcoreSeconds; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 19fc8004a5..93db340e4a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -85,6 +85,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent; @@ -430,7 +431,8 @@ public RMAppAttemptImpl(ApplicationAttemptId appAttemptId, this.proxiedTrackingUrl = generateProxyUriWithScheme(null); this.maybeLastAttempt = maybeLastAttempt; this.stateMachine = stateMachineFactory.make(this); - this.attemptMetrics = new RMAppAttemptMetrics(applicationAttemptId); + this.attemptMetrics = + new RMAppAttemptMetrics(applicationAttemptId, rmContext); } @Override @@ -704,6 +706,10 @@ public ApplicationResourceUsageReport getApplicationResourceUsageReport() { if (report == null) { report = RMServerUtils.DUMMY_APPLICATION_RESOURCE_USAGE_REPORT; } + AggregateAppResourceUsage resUsage = + this.attemptMetrics.getAggregateAppResourceUsage(); + report.setMemorySeconds(resUsage.getMemorySeconds()); + report.setVcoreSeconds(resUsage.getVcoreSeconds()); return report; } finally { this.readLock.unlock(); @@ -733,6 +739,8 @@ public void recover(RMState state) throws Exception { this.proxiedTrackingUrl = generateProxyUriWithScheme(originalTrackingUrl); this.finalStatus = attemptState.getFinalApplicationStatus(); this.startTime = attemptState.getStartTime(); + this.attemptMetrics.updateAggregateAppResourceUsage( + attemptState.getMemorySeconds(),attemptState.getVcoreSeconds()); } public void transferStateFromPreviousAttempt(RMAppAttempt attempt) { @@ -1017,12 +1025,14 @@ private void rememberTargetTransitionsAndStoreState(RMAppAttemptEvent event, default: break; } - + AggregateAppResourceUsage resUsage = + this.attemptMetrics.getAggregateAppResourceUsage(); RMStateStore rmStore = rmContext.getStateStore(); ApplicationAttemptState attemptState = new ApplicationAttemptState(applicationAttemptId, getMasterContainer(), rmStore.getCredentialsFromAppAttempt(this), startTime, - stateToBeStored, finalTrackingUrl, diags, finalStatus, exitStatus); + stateToBeStored, finalTrackingUrl, diags, finalStatus, exitStatus, + resUsage.getMemorySeconds(), resUsage.getVcoreSeconds()); LOG.info("Updating application attempt " + applicationAttemptId + " with final state: " + targetedFinalState + ", and exit status: " + exitStatus); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java index 96b41c3aa7..0e60fd5abb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java @@ -20,6 +20,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; @@ -27,7 +28,9 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.util.resource.Resources; @@ -42,12 +45,17 @@ public class RMAppAttemptMetrics { private ReadLock readLock; private WriteLock writeLock; - - public RMAppAttemptMetrics(ApplicationAttemptId attemptId) { + private AtomicLong finishedMemorySeconds = new AtomicLong(0); + private AtomicLong finishedVcoreSeconds = new AtomicLong(0); + private RMContext rmContext; + + public RMAppAttemptMetrics(ApplicationAttemptId attemptId, + RMContext rmContext) { this.attemptId = attemptId; ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); this.readLock = lock.readLock(); this.writeLock = lock.writeLock(); + this.rmContext = rmContext; } public void updatePreemptionInfo(Resource resource, RMContainer container) { @@ -94,4 +102,28 @@ public void setIsPreempted() { public boolean getIsPreempted() { return this.isPreempted.get(); } + + public AggregateAppResourceUsage getAggregateAppResourceUsage() { + long memorySeconds = finishedMemorySeconds.get(); + long vcoreSeconds = finishedVcoreSeconds.get(); + + // Only add in the running containers if this is the active attempt. + RMAppAttempt currentAttempt = rmContext.getRMApps() + .get(attemptId.getApplicationId()).getCurrentAppAttempt(); + if (currentAttempt.getAppAttemptId().equals(attemptId)) { + ApplicationResourceUsageReport appResUsageReport = rmContext + .getScheduler().getAppResourceUsageReport(attemptId); + if (appResUsageReport != null) { + memorySeconds += appResUsageReport.getMemorySeconds(); + vcoreSeconds += appResUsageReport.getVcoreSeconds(); + } + } + return new AggregateAppResourceUsage(memorySeconds, vcoreSeconds); + } + + public void updateAggregateAppResourceUsage(long finishedMemorySeconds, + long finishedVcoreSeconds) { + this.finishedMemorySeconds.addAndGet(finishedMemorySeconds); + this.finishedVcoreSeconds.addAndGet(finishedVcoreSeconds); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java index eef361f343..e7bb98e7dd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java @@ -24,6 +24,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; +import org.apache.commons.lang.time.DateUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -40,6 +41,7 @@ import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRunningOnNodeEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAllocatedEvent; @@ -488,7 +490,7 @@ public void transition(RMContainerImpl container, RMContainerEvent event) { // Inform AppAttempt // container.getContainer() can return null when a RMContainer is a // reserved container - updateMetricsIfPreempted(container); + updateAttemptMetrics(container); container.eventHandler.handle(new RMAppAttemptContainerFinishedEvent( container.appAttemptId, finishedEvent.getRemoteContainerStatus())); @@ -497,19 +499,27 @@ public void transition(RMContainerImpl container, RMContainerEvent event) { container); } - private static void updateMetricsIfPreempted(RMContainerImpl container) { + private static void updateAttemptMetrics(RMContainerImpl container) { // If this is a preempted container, update preemption metrics + Resource resource = container.getContainer().getResource(); + RMAppAttempt rmAttempt = container.rmContext.getRMApps() + .get(container.getApplicationAttemptId().getApplicationId()) + .getCurrentAppAttempt(); if (ContainerExitStatus.PREEMPTED == container.finishedStatus .getExitStatus()) { - - Resource resource = container.getContainer().getResource(); - RMAppAttempt rmAttempt = - container.rmContext.getRMApps() - .get(container.getApplicationAttemptId().getApplicationId()) - .getCurrentAppAttempt(); rmAttempt.getRMAppAttemptMetrics().updatePreemptionInfo(resource, container); } + + if (rmAttempt != null) { + long usedMillis = container.finishTime - container.creationTime; + long memorySeconds = resource.getMemory() + * usedMillis / DateUtils.MILLIS_PER_SECOND; + long vcoreSeconds = resource.getVirtualCores() + * usedMillis / DateUtils.MILLIS_PER_SECOND; + rmAttempt.getRMAppAttemptMetrics() + .updateAggregateAppResourceUsage(memorySeconds,vcoreSeconds); + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java index 933f456c60..7032e3c378 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java @@ -26,6 +26,7 @@ import java.util.Map; import java.util.Set; +import org.apache.commons.lang.time.DateUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -43,6 +44,7 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent; @@ -69,6 +71,11 @@ public class SchedulerApplicationAttempt { private static final Log LOG = LogFactory .getLog(SchedulerApplicationAttempt.class); + private static final long MEM_AGGREGATE_ALLOCATION_CACHE_MSECS = 3000; + protected long lastMemoryAggregateAllocationUpdateTime = 0; + private long lastMemorySeconds = 0; + private long lastVcoreSeconds = 0; + protected final AppSchedulingInfo appSchedulingInfo; protected Map liveContainers = @@ -505,12 +512,38 @@ public synchronized void resetSchedulingOpportunities(Priority priority, lastScheduledContainer.put(priority, currentTimeMs); schedulingOpportunities.setCount(priority, 0); } - + + synchronized AggregateAppResourceUsage getRunningAggregateAppResourceUsage() { + long currentTimeMillis = System.currentTimeMillis(); + // Don't walk the whole container list if the resources were computed + // recently. + if ((currentTimeMillis - lastMemoryAggregateAllocationUpdateTime) + > MEM_AGGREGATE_ALLOCATION_CACHE_MSECS) { + long memorySeconds = 0; + long vcoreSeconds = 0; + for (RMContainer rmContainer : this.liveContainers.values()) { + long usedMillis = currentTimeMillis - rmContainer.getCreationTime(); + Resource resource = rmContainer.getContainer().getResource(); + memorySeconds += resource.getMemory() * usedMillis / + DateUtils.MILLIS_PER_SECOND; + vcoreSeconds += resource.getVirtualCores() * usedMillis + / DateUtils.MILLIS_PER_SECOND; + } + + lastMemoryAggregateAllocationUpdateTime = currentTimeMillis; + lastMemorySeconds = memorySeconds; + lastVcoreSeconds = vcoreSeconds; + } + return new AggregateAppResourceUsage(lastMemorySeconds, lastVcoreSeconds); + } + public synchronized ApplicationResourceUsageReport getResourceUsageReport() { + AggregateAppResourceUsage resUsage = getRunningAggregateAppResourceUsage(); return ApplicationResourceUsageReport.newInstance(liveContainers.size(), - reservedContainers.size(), Resources.clone(currentConsumption), - Resources.clone(currentReservation), - Resources.add(currentConsumption, currentReservation)); + reservedContainers.size(), Resources.clone(currentConsumption), + Resources.clone(currentReservation), + Resources.add(currentConsumption, currentReservation), + resUsage.getMemorySeconds(), resUsage.getVcoreSeconds()); } public synchronized Map getLiveContainersMap() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java index 846d1e1396..167dcd80e0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java @@ -106,6 +106,9 @@ synchronized public boolean containerCompleted(RMContainer rmContainer, queue.getMetrics().releaseResources(getUser(), 1, containerResource); Resources.subtractFrom(currentConsumption, containerResource); + // Clear resource utilization metrics cache. + lastMemoryAggregateAllocationUpdateTime = -1; + return true; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java index bf543768f8..825c3985c7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java @@ -146,6 +146,9 @@ synchronized public void containerCompleted(RMContainer rmContainer, // remove from preemption map if it is completed preemptionMap.remove(rmContainer); + + // Clear resource utilization metrics cache. + lastMemoryAggregateAllocationUpdateTime = -1; } private synchronized void unreserveInternal( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppBlock.java index b9f2dda884..c427ccfc94 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppBlock.java @@ -146,7 +146,10 @@ protected void render(Block html) { attemptMetrics.getResourcePreempted()) ._("Number of Non-AM Containers Preempted from Current Attempt:", String.valueOf(attemptMetrics - .getNumNonAMContainersPreempted())); + .getNumNonAMContainersPreempted())) + ._("Aggregate Resource Allocation:", + String.format("%d MB-seconds, %d vcore-seconds", + appMerics.getMemorySeconds(), appMerics.getVcoreSeconds())); pdiv._(); Collection attempts = rmApp.getAppAttempts().values(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java index c7354cedab..2b0dedc51c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppInfo.java @@ -79,6 +79,8 @@ public class AppInfo { protected int allocatedMB; protected int allocatedVCores; protected int runningContainers; + protected long memorySeconds; + protected long vcoreSeconds; // preemption info fields protected int preemptedResourceMB; @@ -165,6 +167,8 @@ public AppInfo(RMApp app, Boolean hasAccess, String schemePrefix) { appMetrics.getNumNonAMContainersPreempted(); preemptedResourceVCores = appMetrics.getResourcePreempted().getVirtualCores(); + memorySeconds = appMetrics.getMemorySeconds(); + vcoreSeconds = appMetrics.getVcoreSeconds(); } } @@ -287,4 +291,12 @@ public int getNumNonAMContainersPreempted() { public int getNumAMContainersPreempted() { return numAMContainerPreempted; } + + public long getMemorySeconds() { + return memorySeconds; + } + + public long getVcoreSeconds() { + return vcoreSeconds; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto index eab6af1578..5125a27f68 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/proto/yarn_server_resourcemanager_recovery.proto @@ -78,6 +78,8 @@ message ApplicationAttemptStateDataProto { optional int64 start_time = 7; optional FinalApplicationStatusProto final_application_status = 8; optional int32 am_container_exit_status = 9 [default = -1000]; + optional int64 memory_seconds = 10; + optional int64 vcore_seconds = 11; } message EpochProto { @@ -87,4 +89,4 @@ message EpochProto { message AMRMTokenSecretManagerStateProto { optional MasterKeyProto current_master_key = 1; optional MasterKeyProto next_master_key = 2; -} \ No newline at end of file +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java index 4f4da37da1..571c96f600 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java @@ -60,6 +60,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationAttemptsResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest; @@ -79,6 +80,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; +import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -223,7 +225,7 @@ protected ClientRMService createClientRMService() { } @Test - public void testGetApplicationReport() throws YarnException { + public void testNonExistingApplicationReport() throws YarnException { RMContext rmContext = mock(RMContext.class); when(rmContext.getRMApps()).thenReturn( new ConcurrentHashMap()); @@ -242,6 +244,38 @@ public void testGetApplicationReport() throws YarnException { + "' doesn't exist in RM."); } } + + @Test + public void testGetApplicationReport() throws Exception { + YarnScheduler yarnScheduler = mock(YarnScheduler.class); + RMContext rmContext = mock(RMContext.class); + mockRMContext(yarnScheduler, rmContext); + + ApplicationId appId1 = getApplicationId(1); + + ApplicationACLsManager mockAclsManager = mock(ApplicationACLsManager.class); + when( + mockAclsManager.checkAccess(UserGroupInformation.getCurrentUser(), + ApplicationAccessType.VIEW_APP, null, appId1)).thenReturn(true); + + ClientRMService rmService = new ClientRMService(rmContext, yarnScheduler, + null, mockAclsManager, null, null); + try { + RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); + GetApplicationReportRequest request = recordFactory + .newRecordInstance(GetApplicationReportRequest.class); + request.setApplicationId(appId1); + GetApplicationReportResponse response = + rmService.getApplicationReport(request); + ApplicationReport report = response.getApplicationReport(); + ApplicationResourceUsageReport usageReport = + report.getApplicationResourceUsageReport(); + Assert.assertEquals(10, usageReport.getMemorySeconds()); + Assert.assertEquals(3, usageReport.getVcoreSeconds()); + } finally { + rmService.close(); + } + } @Test public void testGetApplicationAttemptReport() throws YarnException, @@ -1065,11 +1099,11 @@ private ConcurrentHashMap getRMApps( ApplicationId applicationId3 = getApplicationId(3); YarnConfiguration config = new YarnConfiguration(); apps.put(applicationId1, getRMApp(rmContext, yarnScheduler, applicationId1, - config, "testqueue")); + config, "testqueue", 10, 3)); apps.put(applicationId2, getRMApp(rmContext, yarnScheduler, applicationId2, - config, "a")); + config, "a", 20, 2)); apps.put(applicationId3, getRMApp(rmContext, yarnScheduler, applicationId3, - config, "testqueue")); + config, "testqueue", 40, 5)); return apps; } @@ -1091,12 +1125,26 @@ private static ApplicationAttemptId getApplicationAttemptId(int id) { } private RMAppImpl getRMApp(RMContext rmContext, YarnScheduler yarnScheduler, - ApplicationId applicationId3, YarnConfiguration config, String queueName) { + ApplicationId applicationId3, YarnConfiguration config, String queueName, + final long memorySeconds, final long vcoreSeconds) { ApplicationSubmissionContext asContext = mock(ApplicationSubmissionContext.class); when(asContext.getMaxAppAttempts()).thenReturn(1); RMAppImpl app = spy(new RMAppImpl(applicationId3, rmContext, config, null, null, queueName, asContext, yarnScheduler, null, - System.currentTimeMillis(), "YARN", null)); + System.currentTimeMillis(), "YARN", null) { + @Override + public ApplicationReport createAndGetApplicationReport( + String clientUserName, boolean allowAccess) { + ApplicationReport report = super.createAndGetApplicationReport( + clientUserName, allowAccess); + ApplicationResourceUsageReport usageReport = + report.getApplicationResourceUsageReport(); + usageReport.setMemorySeconds(memorySeconds); + usageReport.setVcoreSeconds(vcoreSeconds); + report.setApplicationResourceUsageReport(usageReport); + return report; + } + }); ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance( ApplicationId.newInstance(123456, 1), 1); RMAppAttemptImpl rmAppAttemptImpl = spy(new RMAppAttemptImpl(attemptId, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java new file mode 100644 index 0000000000..d4ecc891f8 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestContainerResourceUsage.java @@ -0,0 +1,401 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager; + +import java.net.UnknownHostException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; + +import org.apache.commons.lang.time.DateUtils; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.api.records.ApplicationAccessType; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerState; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMetrics; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; +import org.apache.log4j.Level; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TestContainerResourceUsage { + + private YarnConfiguration conf; + + @Before + public void setup() throws UnknownHostException { + Logger rootLogger = LogManager.getRootLogger(); + rootLogger.setLevel(Level.DEBUG); + conf = new YarnConfiguration(); + UserGroupInformation.setConfiguration(conf); + conf.set(YarnConfiguration.RECOVERY_ENABLED, "true"); + conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, + YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS); + } + + @After + public void tearDown() { + } + + @Test (timeout = 60000) + public void testUsageWithOneAttemptAndOneContainer() throws Exception { + MockRM rm = new MockRM(conf); + rm.start(); + + MockNM nm = + new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService()); + nm.registerNode(); + + RMApp app0 = rm.submitApp(200); + + RMAppMetrics rmAppMetrics = app0.getRMAppMetrics(); + Assert.assertTrue( + "Before app submittion, memory seconds should have been 0 but was " + + rmAppMetrics.getMemorySeconds(), + rmAppMetrics.getMemorySeconds() == 0); + Assert.assertTrue( + "Before app submission, vcore seconds should have been 0 but was " + + rmAppMetrics.getVcoreSeconds(), + rmAppMetrics.getVcoreSeconds() == 0); + + RMAppAttempt attempt0 = app0.getCurrentAppAttempt(); + + nm.nodeHeartbeat(true); + MockAM am0 = rm.sendAMLaunched(attempt0.getAppAttemptId()); + am0.registerAppAttempt(); + + RMContainer rmContainer = + rm.getResourceScheduler() + .getRMContainer(attempt0.getMasterContainer().getId()); + + // Allow metrics to accumulate. + Thread.sleep(1000); + + rmAppMetrics = app0.getRMAppMetrics(); + Assert.assertTrue( + "While app is running, memory seconds should be >0 but is " + + rmAppMetrics.getMemorySeconds(), + rmAppMetrics.getMemorySeconds() > 0); + Assert.assertTrue( + "While app is running, vcore seconds should be >0 but is " + + rmAppMetrics.getVcoreSeconds(), + rmAppMetrics.getVcoreSeconds() > 0); + + MockRM.finishAMAndVerifyAppState(app0, rm, nm, am0); + + AggregateAppResourceUsage ru = calculateContainerResourceMetrics(rmContainer); + rmAppMetrics = app0.getRMAppMetrics(); + + Assert.assertEquals("Unexcpected MemorySeconds value", + ru.getMemorySeconds(), rmAppMetrics.getMemorySeconds()); + Assert.assertEquals("Unexpected VcoreSeconds value", + ru.getVcoreSeconds(), rmAppMetrics.getVcoreSeconds()); + + rm.stop(); + } + + @Test (timeout = 60000) + public void testUsageWithMultipleContainersAndRMRestart() throws Exception { + // Set max attempts to 1 so that when the first attempt fails, the app + // won't try to start a new one. + conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1); + MemoryRMStateStore memStore = new MemoryRMStateStore(); + memStore.init(conf); + + MockRM rm0 = new MockRM(conf, memStore); + rm0.start(); + MockNM nm = + new MockNM("127.0.0.1:1234", 65536, rm0.getResourceTrackerService()); + nm.registerNode(); + + RMApp app0 = rm0.submitApp(200); + + rm0.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED); + RMAppAttempt attempt0 = app0.getCurrentAppAttempt(); + ApplicationAttemptId attemptId0 = attempt0.getAppAttemptId(); + rm0.waitForState(attemptId0, RMAppAttemptState.SCHEDULED); + + nm.nodeHeartbeat(true); + rm0.waitForState(attemptId0, RMAppAttemptState.ALLOCATED); + MockAM am0 = rm0.sendAMLaunched(attempt0.getAppAttemptId()); + am0.registerAppAttempt(); + + int NUM_CONTAINERS = 2; + am0.allocate("127.0.0.1" , 1000, NUM_CONTAINERS, + new ArrayList()); + nm.nodeHeartbeat(true); + List conts = am0.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers(); + while (conts.size() != NUM_CONTAINERS) { + nm.nodeHeartbeat(true); + conts.addAll(am0.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers()); + Thread.sleep(500); + } + + // launch the 2nd and 3rd containers. + for (Container c : conts) { + nm.nodeHeartbeat(attempt0.getAppAttemptId(), + c.getId().getId(), ContainerState.RUNNING); + rm0.waitForState(nm, c.getId(), RMContainerState.RUNNING); + } + + // Get the RMContainers for all of the live containers, to be used later + // for metrics calculations and comparisons. + Collection rmContainers = + rm0.scheduler + .getSchedulerAppInfo(attempt0.getAppAttemptId()) + .getLiveContainers(); + + // Give the metrics time to accumulate. + Thread.sleep(1000); + + // Stop all non-AM containers + for (Container c : conts) { + if (c.getId().getId() == 1) continue; + nm.nodeHeartbeat(attempt0.getAppAttemptId(), + c.getId().getId(), ContainerState.COMPLETE); + rm0.waitForState(nm, c.getId(), RMContainerState.COMPLETED); + } + + // After all other containers have completed, manually complete the master + // container in order to trigger a save to the state store of the resource + // usage metrics. This will cause the attempt to fail, and, since the max + // attempt retries is 1, the app will also fail. This is intentional so + // that all containers will complete prior to saving. + ContainerId cId = ContainerId.newInstance(attempt0.getAppAttemptId(), 1); + nm.nodeHeartbeat(attempt0.getAppAttemptId(), + cId.getId(), ContainerState.COMPLETE); + rm0.waitForState(nm, cId, RMContainerState.COMPLETED); + + // Check that the container metrics match those from the app usage report. + long memorySeconds = 0; + long vcoreSeconds = 0; + for (RMContainer c : rmContainers) { + AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c); + memorySeconds += ru.getMemorySeconds(); + vcoreSeconds += ru.getVcoreSeconds(); + } + + RMAppMetrics metricsBefore = app0.getRMAppMetrics(); + Assert.assertEquals("Unexcpected MemorySeconds value", + memorySeconds, metricsBefore.getMemorySeconds()); + Assert.assertEquals("Unexpected VcoreSeconds value", + vcoreSeconds, metricsBefore.getVcoreSeconds()); + + // create new RM to represent RM restart. Load up the state store. + MockRM rm1 = new MockRM(conf, memStore); + rm1.start(); + RMApp app0After = + rm1.getRMContext().getRMApps().get(app0.getApplicationId()); + + // Compare container resource usage metrics from before and after restart. + RMAppMetrics metricsAfter = app0After.getRMAppMetrics(); + Assert.assertEquals("Vcore seconds were not the same after RM Restart", + metricsBefore.getVcoreSeconds(), metricsAfter.getVcoreSeconds()); + Assert.assertEquals("Memory seconds were not the same after RM Restart", + metricsBefore.getMemorySeconds(), metricsAfter.getMemorySeconds()); + + rm0.stop(); + rm0.close(); + rm1.stop(); + rm1.close(); + } + + @Test(timeout = 60000) + public void testUsageAfterAMRestartWithMultipleContainers() throws Exception { + amRestartTests(false); + } + + @Test(timeout = 60000) + public void testUsageAfterAMRestartKeepContainers() throws Exception { + amRestartTests(true); + } + + private void amRestartTests(boolean keepRunningContainers) + throws Exception { + MockRM rm = new MockRM(conf); + rm.start(); + + RMApp app = + rm.submitApp(200, "name", "user", + new HashMap(), false, "default", -1, + null, "MAPREDUCE", false, keepRunningContainers); + MockNM nm = + new MockNM("127.0.0.1:1234", 10240, rm.getResourceTrackerService()); + nm.registerNode(); + + MockAM am0 = MockRM.launchAndRegisterAM(app, rm, nm); + int NUM_CONTAINERS = 1; + // allocate NUM_CONTAINERS containers + am0.allocate("127.0.0.1", 1024, NUM_CONTAINERS, + new ArrayList()); + nm.nodeHeartbeat(true); + + // wait for containers to be allocated. + List containers = + am0.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers(); + while (containers.size() != NUM_CONTAINERS) { + nm.nodeHeartbeat(true); + containers.addAll(am0.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers()); + Thread.sleep(200); + } + + // launch the 2nd container. + ContainerId containerId2 = + ContainerId.newInstance(am0.getApplicationAttemptId(), 2); + nm.nodeHeartbeat(am0.getApplicationAttemptId(), + containerId2.getId(), ContainerState.RUNNING); + rm.waitForState(nm, containerId2, RMContainerState.RUNNING); + + // Capture the containers here so the metrics can be calculated after the + // app has completed. + Collection rmContainers = + rm.scheduler + .getSchedulerAppInfo(am0.getApplicationAttemptId()) + .getLiveContainers(); + + // fail the first app attempt by sending CONTAINER_FINISHED event without + // registering. + ContainerId amContainerId = + app.getCurrentAppAttempt().getMasterContainer().getId(); + nm.nodeHeartbeat(am0.getApplicationAttemptId(), + amContainerId.getId(), ContainerState.COMPLETE); + am0.waitForState(RMAppAttemptState.FAILED); + + long memorySeconds = 0; + long vcoreSeconds = 0; + + // Calculate container usage metrics for first attempt. + if (keepRunningContainers) { + // Only calculate the usage for the one container that has completed. + for (RMContainer c : rmContainers) { + if (c.getContainerId().equals(amContainerId)) { + AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c); + memorySeconds += ru.getMemorySeconds(); + vcoreSeconds += ru.getVcoreSeconds(); + } else { + // The remaining container should be RUNNING. + Assert.assertTrue("After first attempt failed, remaining container " + + "should still be running. ", + c.getContainerState().equals(ContainerState.RUNNING)); + } + } + } else { + // If keepRunningContainers is false, all live containers should now + // be completed. Calculate the resource usage metrics for all of them. + for (RMContainer c : rmContainers) { + AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c); + memorySeconds += ru.getMemorySeconds(); + vcoreSeconds += ru.getVcoreSeconds(); + } + } + + // wait for app to start a new attempt. + rm.waitForState(app.getApplicationId(), RMAppState.ACCEPTED); + + // assert this is a new AM. + RMAppAttempt attempt2 = app.getCurrentAppAttempt(); + Assert.assertFalse(attempt2.getAppAttemptId() + .equals(am0.getApplicationAttemptId())); + + // launch the new AM + nm.nodeHeartbeat(true); + MockAM am1 = rm.sendAMLaunched(attempt2.getAppAttemptId()); + am1.registerAppAttempt(); + + // allocate NUM_CONTAINERS containers + am1.allocate("127.0.0.1", 1024, NUM_CONTAINERS, + new ArrayList()); + nm.nodeHeartbeat(true); + + // wait for containers to be allocated. + containers = + am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers(); + while (containers.size() != NUM_CONTAINERS) { + nm.nodeHeartbeat(true); + containers.addAll(am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers()); + Thread.sleep(200); + } + + rm.waitForState(app.getApplicationId(), RMAppState.RUNNING); + + // Capture running containers for later use by metrics calculations. + rmContainers = rm.scheduler.getSchedulerAppInfo(attempt2.getAppAttemptId()) + .getLiveContainers(); + + // complete container by sending the container complete event which has + // earlier attempt's attemptId + amContainerId = app.getCurrentAppAttempt().getMasterContainer().getId(); + nm.nodeHeartbeat(am0.getApplicationAttemptId(), + amContainerId.getId(), ContainerState.COMPLETE); + + MockRM.finishAMAndVerifyAppState(app, rm, nm, am1); + + // Calculate container usage metrics for second attempt. + for (RMContainer c : rmContainers) { + AggregateAppResourceUsage ru = calculateContainerResourceMetrics(c); + memorySeconds += ru.getMemorySeconds(); + vcoreSeconds += ru.getVcoreSeconds(); + } + + RMAppMetrics rmAppMetrics = app.getRMAppMetrics(); + + Assert.assertEquals("Unexcpected MemorySeconds value", + memorySeconds, rmAppMetrics.getMemorySeconds()); + Assert.assertEquals("Unexpected VcoreSeconds value", + vcoreSeconds, rmAppMetrics.getVcoreSeconds()); + + rm.stop(); + return; + } + + private AggregateAppResourceUsage calculateContainerResourceMetrics( + RMContainer rmContainer) { + Resource resource = rmContainer.getContainer().getResource(); + long usedMillis = + rmContainer.getFinishTime() - rmContainer.getCreationTime(); + long memorySeconds = resource.getMemory() + * usedMillis / DateUtils.MILLIS_PER_SECOND; + long vcoreSeconds = resource.getVirtualCores() + * usedMillis / DateUtils.MILLIS_PER_SECOND; + return new AggregateAppResourceUsage(memorySeconds, vcoreSeconds); + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java index ff60fcd7a9..15e45c4c36 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java @@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -170,7 +171,7 @@ public Set getRanNodes() { @Override public RMAppMetrics getRMAppMetrics() { - return new RMAppMetrics(Resource.newInstance(0, 0), 0, 0); + return new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, 0, 0); } } @@ -259,6 +260,22 @@ public int getMaxAppAttempts() { public Set getApplicationTags() { return null; } + + @Override + public ApplicationReport createAndGetApplicationReport( + String clientUserName, boolean allowAccess) { + ApplicationResourceUsageReport usageReport = + ApplicationResourceUsageReport.newInstance(0, 0, null, null, null, + 0, 0); + ApplicationReport report = ApplicationReport.newInstance( + getApplicationId(), appAttemptId, getUser(), getQueue(), + getName(), null, 0, null, null, getDiagnostics().toString(), + getTrackingUrl(), getStartTime(), getFinishTime(), + getFinalApplicationStatus(), usageReport , null, getProgress(), + type, null); + return report; + } + }; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java index 620ba9f232..2621dffcc7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java @@ -64,7 +64,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.AMRMTokenSecretManagerState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager; @@ -152,6 +154,8 @@ ContainerId storeAttempt(RMStateStore store, ApplicationAttemptId attemptId, SecretKey clientTokenMasterKey, TestDispatcher dispatcher) throws Exception { + RMAppAttemptMetrics mockRmAppAttemptMetrics = + mock(RMAppAttemptMetrics.class); Container container = new ContainerPBImpl(); container.setId(ConverterUtils.toContainerId(containerIdStr)); RMAppAttempt mockAttempt = mock(RMAppAttempt.class); @@ -160,6 +164,10 @@ ContainerId storeAttempt(RMStateStore store, ApplicationAttemptId attemptId, when(mockAttempt.getAMRMToken()).thenReturn(appToken); when(mockAttempt.getClientTokenMasterKey()) .thenReturn(clientTokenMasterKey); + when(mockAttempt.getRMAppAttemptMetrics()) + .thenReturn(mockRmAppAttemptMetrics); + when(mockRmAppAttemptMetrics.getAggregateAppResourceUsage()) + .thenReturn(new AggregateAppResourceUsage(0,0)); dispatcher.attemptId = attemptId; store.storeNewApplicationAttempt(mockAttempt); waitNotify(dispatcher); @@ -224,6 +232,8 @@ void testRMAppStateStore(RMStateStoreHelper stateStoreHelper) "container_1352994193343_0002_01_000001", null, null, dispatcher); RMApp mockRemovedApp = mock(RMApp.class); + RMAppAttemptMetrics mockRmAppAttemptMetrics = + mock(RMAppAttemptMetrics.class); HashMap attempts = new HashMap(); ApplicationSubmissionContext context = @@ -234,6 +244,10 @@ void testRMAppStateStore(RMStateStoreHelper stateStoreHelper) when(mockRemovedApp.getAppAttempts()).thenReturn(attempts); RMAppAttempt mockRemovedAttempt = mock(RMAppAttempt.class); when(mockRemovedAttempt.getAppAttemptId()).thenReturn(attemptIdRemoved); + when(mockRemovedAttempt.getRMAppAttemptMetrics()) + .thenReturn(mockRmAppAttemptMetrics); + when(mockRmAppAttemptMetrics.getAggregateAppResourceUsage()) + .thenReturn(new AggregateAppResourceUsage(0,0)); attempts.put(attemptIdRemoved, mockRemovedAttempt); store.removeApplication(mockRemovedApp); @@ -304,7 +318,7 @@ void testRMAppStateStore(RMStateStoreHelper stateStoreHelper) oldAttemptState.getAppAttemptCredentials(), oldAttemptState.getStartTime(), RMAppAttemptState.FINISHED, "myTrackingUrl", "attemptDiagnostics", - FinalApplicationStatus.SUCCEEDED, 100); + FinalApplicationStatus.SUCCEEDED, 100, 0, 0); store.updateApplicationAttemptState(newAttemptState); // test updating the state of an app/attempt whose initial state was not @@ -327,7 +341,7 @@ void testRMAppStateStore(RMStateStoreHelper stateStoreHelper) oldAttemptState.getAppAttemptCredentials(), oldAttemptState.getStartTime(), RMAppAttemptState.FINISHED, "myTrackingUrl", "attemptDiagnostics", - FinalApplicationStatus.SUCCEEDED, 111); + FinalApplicationStatus.SUCCEEDED, 111, 0, 0); store.updateApplicationAttemptState(dummyAttempt); // let things settle down diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java index 3c871df913..5874b5d716 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java @@ -19,8 +19,10 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp; import static org.mockito.Matchers.any; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -38,6 +40,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import org.apache.hadoop.yarn.MockApps; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; @@ -61,6 +64,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; @@ -74,6 +78,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.mockito.ArgumentCaptor; +import org.mockito.Matchers; @RunWith(value = Parameterized.class) @@ -189,7 +194,7 @@ public void setUp() throws Exception { AMLivelinessMonitor amFinishingMonitor = mock(AMLivelinessMonitor.class); store = mock(RMStateStore.class); writer = mock(RMApplicationHistoryWriter.class); - this.rmContext = + RMContext realRMContext = new RMContextImpl(rmDispatcher, containerAllocationExpirer, amLivelinessMonitor, amFinishingMonitor, null, new AMRMTokenSecretManager(conf, this.rmContext), @@ -197,7 +202,14 @@ null, new AMRMTokenSecretManager(conf, this.rmContext), new NMTokenSecretManagerInRM(conf), new ClientToAMTokenSecretManagerInRM(), writer); - ((RMContextImpl)rmContext).setStateStore(store); + ((RMContextImpl)realRMContext).setStateStore(store); + + this.rmContext = spy(realRMContext); + + ResourceScheduler resourceScheduler = mock(ResourceScheduler.class); + doReturn(null).when(resourceScheduler) + .getAppResourceUsageReport((ApplicationAttemptId)Matchers.any()); + doReturn(resourceScheduler).when(rmContext).getScheduler(); rmDispatcher.register(RMAppAttemptEventType.class, new TestApplicationAttemptEventDispatcher(this.rmContext)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index efcecd96e3..ae318b5476 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -25,6 +25,7 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assume.assumeTrue; import static org.mockito.Matchers.any; +import static org.mockito.Matchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; @@ -49,6 +50,7 @@ import org.apache.hadoop.yarn.MockApps; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -87,6 +89,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; @@ -107,6 +111,8 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.mockito.ArgumentCaptor; +import org.mockito.Matchers; +import org.mockito.Mockito; @RunWith(value = Parameterized.class) public class TestRMAppAttemptTransitions { @@ -120,7 +126,9 @@ public class TestRMAppAttemptTransitions { private boolean isSecurityEnabled; private RMContext rmContext; + private RMContext spyRMContext; private YarnScheduler scheduler; + private ResourceScheduler resourceScheduler; private ApplicationMasterService masterService; private ApplicationMasterLauncher applicationMasterLauncher; private AMLivelinessMonitor amLivelinessMonitor; @@ -262,7 +270,20 @@ public void setUp() throws Exception { ApplicationId applicationId = MockApps.newAppID(appId++); ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(applicationId, 0); - + + resourceScheduler = mock(ResourceScheduler.class); + + ApplicationResourceUsageReport appResUsgRpt = + mock(ApplicationResourceUsageReport.class); + when(appResUsgRpt.getMemorySeconds()).thenReturn(0L); + when(appResUsgRpt.getVcoreSeconds()).thenReturn(0L); + when(resourceScheduler + .getAppResourceUsageReport((ApplicationAttemptId)Matchers.any())) + .thenReturn(appResUsgRpt); + spyRMContext = spy(rmContext); + Mockito.doReturn(resourceScheduler).when(spyRMContext).getScheduler(); + + final String user = MockApps.newUserName(); final String queue = MockApps.newQueue(); submissionContext = mock(ApplicationSubmissionContext.class); @@ -278,17 +299,18 @@ public void setUp() throws Exception { application = mock(RMAppImpl.class); applicationAttempt = - new RMAppAttemptImpl(applicationAttemptId, rmContext, scheduler, + new RMAppAttemptImpl(applicationAttemptId, spyRMContext, scheduler, masterService, submissionContext, new Configuration(), false); when(application.getCurrentAppAttempt()).thenReturn(applicationAttempt); when(application.getApplicationId()).thenReturn(applicationId); - + spyRMContext.getRMApps().put(application.getApplicationId(), application); + testAppAttemptNewState(); } @After public void tearDown() throws Exception { - ((AsyncDispatcher)this.rmContext.getDispatcher()).stop(); + ((AsyncDispatcher)this.spyRMContext.getDispatcher()).stop(); } @@ -698,6 +720,46 @@ private void sendAttemptUpdateSavedEvent(RMAppAttempt applicationAttempt) { RMAppAttemptEventType.ATTEMPT_UPDATE_SAVED)); } + @Test + public void testUsageReport() { + // scheduler has info on running apps + ApplicationAttemptId attemptId = applicationAttempt.getAppAttemptId(); + ApplicationResourceUsageReport appResUsgRpt = + mock(ApplicationResourceUsageReport.class); + when(appResUsgRpt.getMemorySeconds()).thenReturn(123456L); + when(appResUsgRpt.getVcoreSeconds()).thenReturn(55544L); + when(scheduler.getAppResourceUsageReport(any(ApplicationAttemptId.class))) + .thenReturn(appResUsgRpt); + + // start and finish the attempt + Container amContainer = allocateApplicationAttempt(); + launchApplicationAttempt(amContainer); + runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl", false); + applicationAttempt.handle(new RMAppAttemptUnregistrationEvent(attemptId, + "", FinalApplicationStatus.SUCCEEDED, "")); + + // expect usage stats to come from the scheduler report + ApplicationResourceUsageReport report = + applicationAttempt.getApplicationResourceUsageReport(); + Assert.assertEquals(123456L, report.getMemorySeconds()); + Assert.assertEquals(55544L, report.getVcoreSeconds()); + + // finish app attempt and remove it from scheduler + when(appResUsgRpt.getMemorySeconds()).thenReturn(223456L); + when(appResUsgRpt.getVcoreSeconds()).thenReturn(75544L); + sendAttemptUpdateSavedEvent(applicationAttempt); + applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( + attemptId, + ContainerStatus.newInstance( + amContainer.getId(), ContainerState.COMPLETE, "", 0))); + + when(scheduler.getSchedulerAppInfo(eq(attemptId))).thenReturn(null); + + report = applicationAttempt.getApplicationResourceUsageReport(); + Assert.assertEquals(223456, report.getMemorySeconds()); + Assert.assertEquals(75544, report.getVcoreSeconds()); + } + @Test public void testUnmanagedAMUnexpectedRegistration() { unmanagedAM = true; @@ -1243,7 +1305,7 @@ public void testFailedToFailed() { public void testContainersCleanupForLastAttempt() { // create a failed attempt. applicationAttempt = - new RMAppAttemptImpl(applicationAttempt.getAppAttemptId(), rmContext, + new RMAppAttemptImpl(applicationAttempt.getAppAttemptId(), spyRMContext, scheduler, masterService, submissionContext, new Configuration(), true); when(submissionContext.getKeepContainersAcrossApplicationAttempts()) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java index 44f8381b48..9862cfebf2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java @@ -23,10 +23,13 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import java.util.ArrayList; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -59,6 +62,8 @@ import org.junit.Assert; import org.junit.Test; import org.mockito.ArgumentCaptor; +import org.mockito.Matchers; +import org.mockito.Mockito; @SuppressWarnings({ "unchecked", "rawtypes" }) public class TestRMContainerImpl { @@ -86,12 +91,18 @@ public void testReleaseWhileRunning() { Container container = BuilderUtils.newContainer(containerId, nodeId, "host:3465", resource, priority, null); + ConcurrentMap rmApps = + spy(new ConcurrentHashMap()); + RMApp rmApp = mock(RMApp.class); + when(rmApp.getRMAppAttempt((ApplicationAttemptId)Matchers.any())).thenReturn(null); + Mockito.doReturn(rmApp).when(rmApps).get((ApplicationId)Matchers.any()); RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class); RMContext rmContext = mock(RMContext.class); when(rmContext.getDispatcher()).thenReturn(drainDispatcher); when(rmContext.getContainerAllocationExpirer()).thenReturn(expirer); when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer); + when(rmContext.getRMApps()).thenReturn(rmApps); RMContainer rmContainer = new RMContainerImpl(container, appAttemptId, nodeId, "user", rmContext); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index d5eb93393e..083cb71acb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -39,12 +39,15 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import org.junit.Assert; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -59,6 +62,7 @@ import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; @@ -79,6 +83,8 @@ import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.mockito.Matchers; +import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -90,6 +96,7 @@ public class TestLeafQueue { RecordFactoryProvider.getRecordFactory(null); RMContext rmContext; + RMContext spyRMContext; CapacityScheduler cs; CapacitySchedulerConfiguration csConf; CapacitySchedulerContext csContext; @@ -107,6 +114,14 @@ public void setUp() throws Exception { CapacityScheduler spyCs = new CapacityScheduler(); cs = spy(spyCs); rmContext = TestUtils.getMockRMContext(); + spyRMContext = spy(rmContext); + + ConcurrentMap spyApps = + spy(new ConcurrentHashMap()); + RMApp rmApp = mock(RMApp.class); + when(rmApp.getRMAppAttempt((ApplicationAttemptId)Matchers.any())).thenReturn(null); + Mockito.doReturn(rmApp).when(spyApps).get((ApplicationId)Matchers.any()); + when(spyRMContext.getRMApps()).thenReturn(spyApps); csConf = new CapacitySchedulerConfiguration(); @@ -143,7 +158,7 @@ public void setUp() throws Exception { queues, queues, TestUtils.spyHook); - cs.setRMContext(rmContext); + cs.setRMContext(spyRMContext); cs.init(csConf); cs.start(); } @@ -280,14 +295,14 @@ public void testSingleQueueOneUserMetrics() throws Exception { TestUtils.getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); a.submitApplicationAttempt(app_1, user_0); // same user @@ -329,14 +344,14 @@ public void testUserQueueAcl() throws Exception { final ApplicationAttemptId appAttemptId_0 = TestUtils .getMockApplicationAttemptId(0, 1); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_d, d, null, - rmContext); + spyRMContext); d.submitApplicationAttempt(app_0, user_d); // Attempt the same application again final ApplicationAttemptId appAttemptId_1 = TestUtils .getMockApplicationAttemptId(0, 2); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_d, d, null, - rmContext); + spyRMContext); d.submitApplicationAttempt(app_1, user_d); // same user } @@ -373,7 +388,7 @@ public void testAppAttemptMetrics() throws Exception { final ApplicationAttemptId appAttemptId_1 = TestUtils .getMockApplicationAttemptId(0, 2); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, null, - rmContext); + spyRMContext); a.submitApplicationAttempt(app_1, user_0); // same user assertEquals(1, a.getMetrics().getAppsSubmitted()); @@ -411,14 +426,14 @@ public void testSingleQueueWithOneUser() throws Exception { TestUtils.getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); a.submitApplicationAttempt(app_1, user_0); // same user @@ -545,21 +560,21 @@ public void testUserLimits() throws Exception { TestUtils.getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, - a.getActiveUsersManager(), rmContext); + a.getActiveUsersManager(), spyRMContext); a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, - a.getActiveUsersManager(), rmContext); + a.getActiveUsersManager(), spyRMContext); a.submitApplicationAttempt(app_1, user_0); // same user final ApplicationAttemptId appAttemptId_2 = TestUtils.getMockApplicationAttemptId(2, 0); FiCaSchedulerApp app_2 = new FiCaSchedulerApp(appAttemptId_2, user_1, a, - a.getActiveUsersManager(), rmContext); + a.getActiveUsersManager(), spyRMContext); a.submitApplicationAttempt(app_2, user_1); // Setup some nodes @@ -639,21 +654,21 @@ public void testHeadroomWithMaxCap() throws Exception { TestUtils.getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, - a.getActiveUsersManager(), rmContext); + a.getActiveUsersManager(), spyRMContext); a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, - a.getActiveUsersManager(), rmContext); + a.getActiveUsersManager(), spyRMContext); a.submitApplicationAttempt(app_1, user_0); // same user final ApplicationAttemptId appAttemptId_2 = TestUtils.getMockApplicationAttemptId(2, 0); FiCaSchedulerApp app_2 = new FiCaSchedulerApp(appAttemptId_2, user_1, a, - a.getActiveUsersManager(), rmContext); + a.getActiveUsersManager(), spyRMContext); a.submitApplicationAttempt(app_2, user_1); // Setup some nodes @@ -750,28 +765,28 @@ public void testSingleQueueWithMultipleUsers() throws Exception { TestUtils.getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, - a.getActiveUsersManager(), rmContext); + a.getActiveUsersManager(), spyRMContext); a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, - a.getActiveUsersManager(), rmContext); + a.getActiveUsersManager(), spyRMContext); a.submitApplicationAttempt(app_1, user_0); // same user final ApplicationAttemptId appAttemptId_2 = TestUtils.getMockApplicationAttemptId(2, 0); FiCaSchedulerApp app_2 = new FiCaSchedulerApp(appAttemptId_2, user_1, a, - a.getActiveUsersManager(), rmContext); + a.getActiveUsersManager(), spyRMContext); a.submitApplicationAttempt(app_2, user_1); final ApplicationAttemptId appAttemptId_3 = TestUtils.getMockApplicationAttemptId(3, 0); FiCaSchedulerApp app_3 = new FiCaSchedulerApp(appAttemptId_3, user_2, a, - a.getActiveUsersManager(), rmContext); + a.getActiveUsersManager(), spyRMContext); a.submitApplicationAttempt(app_3, user_2); // Setup some nodes @@ -935,14 +950,14 @@ public void testReservation() throws Exception { TestUtils.getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_1, a, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); a.submitApplicationAttempt(app_1, user_1); // Setup some nodes @@ -1043,14 +1058,14 @@ public void testStolenReservedContainer() throws Exception { TestUtils.getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_1, a, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); a.submitApplicationAttempt(app_1, user_1); // Setup some nodes @@ -1150,14 +1165,14 @@ public void testReservationExchange() throws Exception { TestUtils.getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_1, a, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); a.submitApplicationAttempt(app_1, user_1); // Setup some nodes @@ -1277,7 +1292,7 @@ public void testLocalityScheduling() throws Exception { TestUtils.getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = spy(new FiCaSchedulerApp(appAttemptId_0, user_0, a, - mock(ActiveUsersManager.class), rmContext)); + mock(ActiveUsersManager.class), spyRMContext)); a.submitApplicationAttempt(app_0, user_0); // Setup some nodes and racks @@ -1418,7 +1433,7 @@ public void testApplicationPriorityScheduling() throws Exception { TestUtils.getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = spy(new FiCaSchedulerApp(appAttemptId_0, user_0, a, - mock(ActiveUsersManager.class), rmContext)); + mock(ActiveUsersManager.class), spyRMContext)); a.submitApplicationAttempt(app_0, user_0); // Setup some nodes and racks @@ -1549,7 +1564,7 @@ public void testSchedulingConstraints() throws Exception { TestUtils.getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = spy(new FiCaSchedulerApp(appAttemptId_0, user_0, a, - mock(ActiveUsersManager.class), rmContext)); + mock(ActiveUsersManager.class), spyRMContext)); a.submitApplicationAttempt(app_0, user_0); // Setup some nodes and racks @@ -1652,21 +1667,21 @@ public void testActivateApplicationAfterQueueRefresh() throws Exception { TestUtils.getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_e, e, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); e.submitApplicationAttempt(app_0, user_e); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_e, e, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); e.submitApplicationAttempt(app_1, user_e); // same user final ApplicationAttemptId appAttemptId_2 = TestUtils.getMockApplicationAttemptId(2, 0); FiCaSchedulerApp app_2 = new FiCaSchedulerApp(appAttemptId_2, user_e, e, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); e.submitApplicationAttempt(app_2, user_e); // same user // before reinitialization @@ -1730,21 +1745,21 @@ public void testActivateApplicationByUpdatingClusterResource() TestUtils.getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_e, e, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); e.submitApplicationAttempt(app_0, user_e); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_e, e, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); e.submitApplicationAttempt(app_1, user_e); // same user final ApplicationAttemptId appAttemptId_2 = TestUtils.getMockApplicationAttemptId(2, 0); FiCaSchedulerApp app_2 = new FiCaSchedulerApp(appAttemptId_2, user_e, e, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); e.submitApplicationAttempt(app_2, user_e); // same user // before updating cluster resource @@ -1807,14 +1822,14 @@ public void testLocalityConstraints() throws Exception { TestUtils.getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = spy(new FiCaSchedulerApp(appAttemptId_0, user_0, a, - mock(ActiveUsersManager.class), rmContext)); + mock(ActiveUsersManager.class), spyRMContext)); a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = spy(new FiCaSchedulerApp(appAttemptId_1, user_0, a, - mock(ActiveUsersManager.class), rmContext)); + mock(ActiveUsersManager.class), spyRMContext)); a.submitApplicationAttempt(app_1, user_0); // Setup some nodes and racks @@ -2062,14 +2077,14 @@ public void testAllocateContainerOnNodeWithoutOffSwitchSpecified() TestUtils.getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, - mock(ActiveUsersManager.class), rmContext); + mock(ActiveUsersManager.class), spyRMContext); a.submitApplicationAttempt(app_1, user_0); // same user // Setup some nodes diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java index 61def878b2..bd7f1bdaf6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java @@ -162,7 +162,7 @@ protected ApplicationAttemptId createSchedulingRequest( RMAppAttempt rmAppAttempt = mock(RMAppAttempt.class); when(rmApp.getCurrentAppAttempt()).thenReturn(rmAppAttempt); when(rmAppAttempt.getRMAppAttemptMetrics()).thenReturn( - new RMAppAttemptMetrics(id)); + new RMAppAttemptMetrics(id, resourceManager.getRMContext())); resourceManager.getRMContext().getRMApps() .put(id.getApplicationId(), rmApp); return id; @@ -183,7 +183,7 @@ protected ApplicationAttemptId createSchedulingRequest(String queueId, RMAppAttempt rmAppAttempt = mock(RMAppAttempt.class); when(rmApp.getCurrentAppAttempt()).thenReturn(rmAppAttempt); when(rmAppAttempt.getRMAppAttemptMetrics()).thenReturn( - new RMAppAttemptMetrics(id)); + new RMAppAttemptMetrics(id,resourceManager.getRMContext())); resourceManager.getRMContext().getRMApps() .put(id.getApplicationId(), rmApp); return id; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java index 111bf47d2b..f07cb8d2f1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebAppFairScheduler.java @@ -136,7 +136,7 @@ private static RMContext mockRMContext(List states) { MockRMApp app = new MockRMApp(i, i, state) { @Override public RMAppMetrics getRMAppMetrics() { - return new RMAppMetrics(Resource.newInstance(0, 0), 0, 0); + return new RMAppMetrics(Resource.newInstance(0, 0), 0, 0, 0, 0); } @Override public YarnApplicationState createApplicationState() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java index b009bfa2e8..3701dd0c7c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java @@ -1322,7 +1322,7 @@ public void verifyAppInfo(JSONObject info, RMApp app) throws JSONException, Exception { // 28 because trackingUrl not assigned yet - assertEquals("incorrect number of elements", 24, info.length()); + assertEquals("incorrect number of elements", 26, info.length()); verifyAppInfoGeneric(app, info.getString("id"), info.getString("user"), info.getString("name"), info.getString("applicationType"), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ResourceManagerRest.apt.vm b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ResourceManagerRest.apt.vm index 9609ba39de..0fd5b242a1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ResourceManagerRest.apt.vm +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ResourceManagerRest.apt.vm @@ -1197,7 +1197,9 @@ ResourceManager REST API's. "queue" : "default", "allocatedMB" : 0, "allocatedVCores" : 0, - "runningContainers" : 0 + "runningContainers" : 0, + "memorySeconds" : 151730, + "vcoreSeconds" : 103 }, { "finishedTime" : 1326815789546, @@ -1218,7 +1220,9 @@ ResourceManager REST API's. "queue" : "default", "allocatedMB" : 0, "allocatedVCores" : 0, - "runningContainers" : 1 + "runningContainers" : 1, + "memorySeconds" : 640064, + "vcoreSeconds" : 442 } ] } @@ -1271,6 +1275,8 @@ _01_000001 0 0 0 + 151730 + 103 application_1326815542473_0002 @@ -1293,6 +1299,8 @@ _01_000001 0 0 0 + 640064 + 442 +---+ @@ -1491,6 +1499,10 @@ _01_000001 +---------------------------------------------------------------+ | runningContainers | int | The number of containers currently running for the application | +---------------------------------------------------------------+ +| memorySeconds | long | The amount of memory the application has allocated (megabyte-seconds) | +*---------------+--------------+--------------------------------+ +| vcoreSeconds | long | The amount of CPU resources the application has allocated (virtual core-seconds) | +*---------------+--------------+--------------------------------+ ** Response Examples @@ -1532,7 +1544,9 @@ _01_000001 "elapsedTime" : 446748, "diagnostics" : "", "trackingUrl" : "http://host.domain.com:8088/proxy/application_1326821518301_0005/jobhistory/job/job_1326821518301_5_5", - "queue" : "a1" + "queue" : "a1", + "memorySeconds" : 151730, + "vcoreSeconds" : 103 } } +---+ @@ -1576,6 +1590,8 @@ _01_000001 446748 http://host.domain.com:8042/node/containerlogs/container_1326821518301_0005_01_000001 host.domain.com:8042 + 151730 + 103 +---+ From 4be95175cdb58ff12a27ab443d609d3b46da7bfa Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Wed, 10 Sep 2014 19:22:52 -0700 Subject: [PATCH 13/13] YARN-2440. Enabled Nodemanagers to limit the aggregate cpu usage across all containers to a preconfigured limit. Contributed by Varun Vasudev. --- hadoop-yarn-project/CHANGES.txt | 3 + .../hadoop/yarn/conf/YarnConfiguration.java | 6 + .../src/main/resources/yarn-default.xml | 16 +- .../util/CgroupsLCEResourcesHandler.java | 94 ++++++++++- .../util/NodeManagerHardwareUtils.java | 79 ++++++++++ .../util/TestCgroupsLCEResourcesHandler.java | 147 +++++++++++++++++- .../util/TestNodeManagerHardwareUtils.java | 72 +++++++++ 7 files changed, 408 insertions(+), 9 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index b28f8f823f..6a871a58a8 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -70,6 +70,9 @@ Release 2.6.0 - UNRELEASED YARN-415. Capture aggregate memory allocation at the app-level for chargeback. (Eric Payne & Andrey Klochkov via jianhe) + YARN-2440. Enabled Nodemanagers to limit the aggregate cpu usage across all + containers to a preconfigured limit. (Varun Vasudev via vinodkv) + IMPROVEMENTS YARN-2197. Add a link to YARN CHANGES.txt in the left side of doc diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 7b7511d20b..7c71a1717f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -723,6 +723,12 @@ public class YarnConfiguration extends Configuration { /** Number of Virtual CPU Cores which can be allocated for containers.*/ public static final String NM_VCORES = NM_PREFIX + "resource.cpu-vcores"; public static final int DEFAULT_NM_VCORES = 8; + + /** Percentage of overall CPU which can be allocated for containers. */ + public static final String NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT = + NM_PREFIX + "resource.percentage-physical-cpu-limit"; + public static final int DEFAULT_NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT = + 100; /** NM Webapp address.**/ public static final String NM_WEBAPP_ADDRESS = NM_PREFIX + "webapp.address"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 9b4a90f479..04e458cd72 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -871,12 +871,24 @@ - Number of CPU cores that can be allocated - for containers. + Number of vcores that can be allocated + for containers. This is used by the RM scheduler when allocating + resources for containers. This is not used to limit the number of + physical cores used by YARN containers. yarn.nodemanager.resource.cpu-vcores 8 + + Percentage of CPU that can be allocated + for containers. This setting allows users to limit the amount of + CPU that YARN containers use. Currently functional only + on Linux using cgroups. The default is to use 100% of CPU. + + yarn.nodemanager.resource.percentage-physical-cpu-limit + 100 + + NM Webapp address. yarn.nodemanager.webapp.address diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java index d5bd22540f..0b6c2ac60b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/CgroupsLCEResourcesHandler.java @@ -33,6 +33,7 @@ import java.util.regex.Pattern; import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.io.FileUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -42,6 +43,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor; import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; import org.apache.hadoop.yarn.util.SystemClock; public class CgroupsLCEResourcesHandler implements LCEResourcesHandler { @@ -59,7 +61,11 @@ public class CgroupsLCEResourcesHandler implements LCEResourcesHandler { private final String MTAB_FILE = "/proc/mounts"; private final String CGROUPS_FSTYPE = "cgroup"; private final String CONTROLLER_CPU = "cpu"; + private final String CPU_PERIOD_US = "cfs_period_us"; + private final String CPU_QUOTA_US = "cfs_quota_us"; private final int CPU_DEFAULT_WEIGHT = 1024; // set by kernel + private final int MAX_QUOTA_US = 1000 * 1000; + private final int MIN_PERIOD_US = 1000; private final Map controllerPaths; // Controller -> path private long deleteCgroupTimeout; @@ -106,8 +112,15 @@ void initConfig() throws IOException { } public void init(LinuxContainerExecutor lce) throws IOException { + this.init(lce, + ResourceCalculatorPlugin.getResourceCalculatorPlugin(null, conf)); + } + + @VisibleForTesting + void init(LinuxContainerExecutor lce, ResourceCalculatorPlugin plugin) + throws IOException { initConfig(); - + // mount cgroups if requested if (cgroupMount && cgroupMountPath != null) { ArrayList cgroupKVs = new ArrayList(); @@ -117,8 +130,74 @@ public void init(LinuxContainerExecutor lce) throws IOException { } initializeControllerPaths(); + + // cap overall usage to the number of cores allocated to YARN + float yarnProcessors = + NodeManagerHardwareUtils.getContainersCores(plugin, conf); + int systemProcessors = plugin.getNumProcessors(); + if (systemProcessors != (int) yarnProcessors) { + LOG.info("YARN containers restricted to " + yarnProcessors + " cores"); + int[] limits = getOverallLimits(yarnProcessors); + updateCgroup(CONTROLLER_CPU, "", CPU_PERIOD_US, String.valueOf(limits[0])); + updateCgroup(CONTROLLER_CPU, "", CPU_QUOTA_US, String.valueOf(limits[1])); + } else if (cpuLimitsExist()) { + LOG.info("Removing CPU constraints for YARN containers."); + updateCgroup(CONTROLLER_CPU, "", CPU_QUOTA_US, String.valueOf(-1)); + } } + boolean cpuLimitsExist() throws IOException { + String path = pathForCgroup(CONTROLLER_CPU, ""); + File quotaFile = new File(path, CONTROLLER_CPU + "." + CPU_QUOTA_US); + if (quotaFile.exists()) { + String contents = FileUtils.readFileToString(quotaFile, "UTF-8"); + int quotaUS = Integer.parseInt(contents.trim()); + if (quotaUS != -1) { + return true; + } + } + return false; + } + + @VisibleForTesting + int[] getOverallLimits(float yarnProcessors) { + + int[] ret = new int[2]; + + if (yarnProcessors < 0.01f) { + throw new IllegalArgumentException("Number of processors can't be <= 0."); + } + + int quotaUS = MAX_QUOTA_US; + int periodUS = (int) (MAX_QUOTA_US / yarnProcessors); + if (yarnProcessors < 1.0f) { + periodUS = MAX_QUOTA_US; + quotaUS = (int) (periodUS * yarnProcessors); + if (quotaUS < MIN_PERIOD_US) { + LOG + .warn("The quota calculated for the cgroup was too low. The minimum value is " + + MIN_PERIOD_US + ", calculated value is " + quotaUS + + ". Setting quota to minimum value."); + quotaUS = MIN_PERIOD_US; + } + } + + // cfs_period_us can't be less than 1000 microseconds + // if the value of periodUS is less than 1000, we can't really use cgroups + // to limit cpu + if (periodUS < MIN_PERIOD_US) { + LOG + .warn("The period calculated for the cgroup was too low. The minimum value is " + + MIN_PERIOD_US + ", calculated value is " + periodUS + + ". Using all available CPU."); + periodUS = MAX_QUOTA_US; + quotaUS = -1; + } + + ret[0] = periodUS; + ret[1] = quotaUS; + return ret; + } boolean isCpuWeightEnabled() { return this.cpuWeightEnabled; @@ -274,7 +353,7 @@ private Map> parseMtab() throws IOException { BufferedReader in = null; try { - in = new BufferedReader(new FileReader(new File(MTAB_FILE))); + in = new BufferedReader(new FileReader(new File(getMtabFileName()))); for (String str = in.readLine(); str != null; str = in.readLine()) { @@ -292,13 +371,13 @@ private Map> parseMtab() throws IOException { } } } catch (IOException e) { - throw new IOException("Error while reading " + MTAB_FILE, e); + throw new IOException("Error while reading " + getMtabFileName(), e); } finally { // Close the streams try { in.close(); } catch (IOException e2) { - LOG.warn("Error closing the stream: " + MTAB_FILE, e2); + LOG.warn("Error closing the stream: " + getMtabFileName(), e2); } } @@ -334,7 +413,12 @@ private void initializeControllerPaths() throws IOException { } } else { throw new IOException("Not able to enforce cpu weights; cannot find " - + "cgroup for cpu controller in " + MTAB_FILE); + + "cgroup for cpu controller in " + getMtabFileName()); } } + + @VisibleForTesting + String getMtabFileName() { + return MTAB_FILE; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java new file mode 100644 index 0000000000..07cf698429 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerHardwareUtils.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.util; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; + +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class NodeManagerHardwareUtils { + + /** + * + * Returns the fraction of CPU cores that should be used for YARN containers. + * The number is derived based on various configuration params such as + * YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT + * + * @param conf + * - Configuration object + * @return Fraction of CPU cores to be used for YARN containers + */ + public static float getContainersCores(Configuration conf) { + ResourceCalculatorPlugin plugin = + ResourceCalculatorPlugin.getResourceCalculatorPlugin(null, conf); + return NodeManagerHardwareUtils.getContainersCores(plugin, conf); + } + + /** + * + * Returns the fraction of CPU cores that should be used for YARN containers. + * The number is derived based on various configuration params such as + * YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT + * + * @param plugin + * - ResourceCalculatorPlugin object to determine hardware specs + * @param conf + * - Configuration object + * @return Fraction of CPU cores to be used for YARN containers + */ + public static float getContainersCores(ResourceCalculatorPlugin plugin, + Configuration conf) { + int numProcessors = plugin.getNumProcessors(); + int nodeCpuPercentage = + Math.min(conf.getInt( + YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT, + YarnConfiguration.DEFAULT_NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT), + 100); + nodeCpuPercentage = Math.max(0, nodeCpuPercentage); + + if (nodeCpuPercentage == 0) { + String message = + "Illegal value for " + + YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT + + ". Value cannot be less than or equal to 0."; + throw new IllegalArgumentException(message); + } + + return (nodeCpuPercentage * numProcessors) / 100.0f; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestCgroupsLCEResourcesHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestCgroupsLCEResourcesHandler.java index 611045ea2d..4506898804 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestCgroupsLCEResourcesHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestCgroupsLCEResourcesHandler.java @@ -17,13 +17,18 @@ */ package org.apache.hadoop.yarn.server.nodemanager.util; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor; +import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; import org.junit.Assert; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.util.Clock; import org.junit.Test; +import org.mockito.Mockito; -import java.io.File; -import java.io.FileOutputStream; +import java.io.*; +import java.util.List; +import java.util.Scanner; import java.util.UUID; import java.util.concurrent.CountDownLatch; @@ -70,4 +75,142 @@ public void run() { Assert.assertFalse(handler.deleteCgroup(file.getPath())); } + static class MockLinuxContainerExecutor extends LinuxContainerExecutor { + @Override + public void mountCgroups(List x, String y) { + } + } + + static class CustomCgroupsLCEResourceHandler extends + CgroupsLCEResourcesHandler { + + String mtabFile; + int[] limits = new int[2]; + + @Override + int[] getOverallLimits(float x) { + return limits; + } + + void setMtabFile(String file) { + mtabFile = file; + } + + @Override + String getMtabFileName() { + return mtabFile; + } + } + + @Test + public void testInit() throws IOException { + LinuxContainerExecutor mockLCE = new MockLinuxContainerExecutor(); + CustomCgroupsLCEResourceHandler handler = + new CustomCgroupsLCEResourceHandler(); + YarnConfiguration conf = new YarnConfiguration(); + final int numProcessors = 4; + ResourceCalculatorPlugin plugin = + Mockito.mock(ResourceCalculatorPlugin.class); + Mockito.doReturn(numProcessors).when(plugin).getNumProcessors(); + handler.setConf(conf); + handler.initConfig(); + + // create mock cgroup + File cgroupDir = new File("target", UUID.randomUUID().toString()); + if (!cgroupDir.mkdir()) { + String message = "Could not create dir " + cgroupDir.getAbsolutePath(); + throw new IOException(message); + } + File cgroupMountDir = new File(cgroupDir.getAbsolutePath(), "hadoop-yarn"); + if (!cgroupMountDir.mkdir()) { + String message = + "Could not create dir " + cgroupMountDir.getAbsolutePath(); + throw new IOException(message); + } + + // create mock mtab + String mtabContent = + "none " + cgroupDir.getAbsolutePath() + " cgroup rw,relatime,cpu 0 0"; + File mockMtab = new File("target", UUID.randomUUID().toString()); + if (!mockMtab.exists()) { + if (!mockMtab.createNewFile()) { + String message = "Could not create file " + mockMtab.getAbsolutePath(); + throw new IOException(message); + } + } + FileWriter mtabWriter = new FileWriter(mockMtab.getAbsoluteFile()); + mtabWriter.write(mtabContent); + mtabWriter.close(); + mockMtab.deleteOnExit(); + + // setup our handler and call init() + handler.setMtabFile(mockMtab.getAbsolutePath()); + + // check values + // in this case, we're using all cpu so the files + // shouldn't exist(because init won't create them + handler.init(mockLCE, plugin); + File periodFile = new File(cgroupMountDir, "cpu.cfs_period_us"); + File quotaFile = new File(cgroupMountDir, "cpu.cfs_quota_us"); + Assert.assertFalse(periodFile.exists()); + Assert.assertFalse(quotaFile.exists()); + + // subset of cpu being used, files should be created + conf.setInt(YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT, 75); + handler.limits[0] = 100 * 1000; + handler.limits[1] = 1000 * 1000; + handler.init(mockLCE, plugin); + int period = readIntFromFile(periodFile); + int quota = readIntFromFile(quotaFile); + Assert.assertEquals(100 * 1000, period); + Assert.assertEquals(1000 * 1000, quota); + + // set cpu back to 100, quota should be -1 + conf.setInt(YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT, 100); + handler.limits[0] = 100 * 1000; + handler.limits[1] = 1000 * 1000; + handler.init(mockLCE, plugin); + quota = readIntFromFile(quotaFile); + Assert.assertEquals(-1, quota); + + FileUtils.deleteQuietly(cgroupDir); + } + + private int readIntFromFile(File targetFile) throws IOException { + Scanner scanner = new Scanner(targetFile); + if (scanner.hasNextInt()) { + return scanner.nextInt(); + } + return -1; + } + + @Test + public void testGetOverallLimits() { + + int expectedQuota = 1000 * 1000; + CgroupsLCEResourcesHandler handler = new CgroupsLCEResourcesHandler(); + + int[] ret = handler.getOverallLimits(2); + Assert.assertEquals(expectedQuota / 2, ret[0]); + Assert.assertEquals(expectedQuota, ret[1]); + + ret = handler.getOverallLimits(2000); + Assert.assertEquals(expectedQuota, ret[0]); + Assert.assertEquals(-1, ret[1]); + + int[] params = { 0, -1 }; + for (int cores : params) { + try { + handler.getOverallLimits(cores); + Assert.fail("Function call should throw error."); + } catch (IllegalArgumentException ie) { + // expected + } + } + + // test minimums + ret = handler.getOverallLimits(1000 * 1000); + Assert.assertEquals(1000 * 1000, ret[0]); + Assert.assertEquals(-1, ret[1]); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java new file mode 100644 index 0000000000..e1af9483a8 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.util; + +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; +import org.junit.Assert; +import org.junit.Test; +import org.mockito.Mockito; + +public class TestNodeManagerHardwareUtils { + + @Test + public void testGetContainerCores() { + + YarnConfiguration conf = new YarnConfiguration(); + float ret; + final int numProcessors = 4; + ResourceCalculatorPlugin plugin = + Mockito.mock(ResourceCalculatorPlugin.class); + Mockito.doReturn(numProcessors).when(plugin).getNumProcessors(); + + conf.setInt(YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT, 0); + try { + NodeManagerHardwareUtils.getContainersCores(plugin, conf); + Assert.fail("getContainerCores should have thrown exception"); + } catch (IllegalArgumentException ie) { + // expected + } + + conf.setInt(YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT, + 100); + ret = NodeManagerHardwareUtils.getContainersCores(plugin, conf); + Assert.assertEquals(4, (int) ret); + + conf + .setInt(YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT, 50); + ret = NodeManagerHardwareUtils.getContainersCores(plugin, conf); + Assert.assertEquals(2, (int) ret); + + conf + .setInt(YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT, 75); + ret = NodeManagerHardwareUtils.getContainersCores(plugin, conf); + Assert.assertEquals(3, (int) ret); + + conf + .setInt(YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT, 85); + ret = NodeManagerHardwareUtils.getContainersCores(plugin, conf); + Assert.assertEquals(3.4, ret, 0.1); + + conf.setInt(YarnConfiguration.NM_RESOURCE_PERCENTAGE_PHYSICAL_CPU_LIMIT, + 110); + ret = NodeManagerHardwareUtils.getContainersCores(plugin, conf); + Assert.assertEquals(4, (int) ret); + } +}