diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 70ea5fed36..59feb382c5 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -245,6 +245,9 @@ Release 0.23.0 - Unreleased MAPREDUCE-2774. Add startup message to ResourceManager & NodeManager on startup. (Venu Gopala Rao via acmurthy) + MAPREDUCE-2655. Add audit logs to ResourceManager and NodeManager. (Thomas + Graves via acmurthy) + OPTIMIZATIONS MAPREDUCE-2026. Make JobTracker.getJobCounters() and diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index 9248c3ee04..49bf3f0ef0 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -65,6 +65,8 @@ import org.apache.hadoop.yarn.server.nodemanager.ContainerManagerEvent; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; +import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger; +import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.nodemanager.NMConfig; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; @@ -266,6 +268,10 @@ public StartContainerResponse startContainer(StartContainerRequest request) ContainerId containerID = launchContext.getContainerId(); ApplicationId applicationID = containerID.getAppId(); if (context.getContainers().putIfAbsent(containerID, container) != null) { + NMAuditLogger.logFailure(launchContext.getUser(), + AuditConstants.START_CONTAINER, "ContainerManagerImpl", + "Container already running on this node!", + applicationID, containerID); throw RPCUtil.getRemoteException("Container " + containerID + " already is running on this node!!"); } @@ -281,6 +287,11 @@ public StartContainerResponse startContainer(StartContainerRequest request) // TODO: Validate the request dispatcher.getEventHandler().handle(new ApplicationInitEvent(container)); + + NMAuditLogger.logSuccess(launchContext.getUser(), + AuditConstants.START_CONTAINER, "ContainerManageImpl", + applicationID, containerID); + StartContainerResponse response = recordFactory.newRecordInstance(StartContainerResponse.class); response.addAllServiceResponse(auxiluaryServices.getMeta()); @@ -300,12 +311,23 @@ public StopContainerResponse stopContainer(StopContainerRequest request) Container container = this.context.getContainers().get(containerID); if (container == null) { LOG.warn("Trying to stop unknown container " + containerID); + NMAuditLogger.logFailure(container.getUser(), + AuditConstants.STOP_CONTAINER, "ContainerManagerImpl", + "Trying to stop unknown container!", + containerID.getAppId(), containerID); return response; // Return immediately. } dispatcher.getEventHandler().handle( new ContainerKillEvent(containerID, "Container killed by the ApplicationMaster.")); + // user logged here not ideal since just getting user from container but + // request doesn't have anything and should be coming from user of AM so + // should be the same or should be rejected by auth before here. + NMAuditLogger.logSuccess(container.getUser(), + AuditConstants.STOP_CONTAINER, "ContainerManageImpl", + containerID.getAppId(), containerID); + // TODO: Move this code to appropriate place once kill_container is // implemented. nodeStatusUpdater.sendOutofBandHeartBeat(); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 660311c34b..8a4439d32b 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -42,6 +42,8 @@ import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger; +import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServicesEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationContainerFinishedEvent; @@ -365,18 +367,28 @@ private void finished() { case EXITED_WITH_SUCCESS: metrics.endRunningContainer(); metrics.completedContainer(); + NMAuditLogger.logSuccess(getUser(), + AuditConstants.FINISH_SUCCESS_CONTAINER, "ContainerImpl", + getContainerID().getAppId(), getContainerID()); break; case EXITED_WITH_FAILURE: metrics.endRunningContainer(); // fall through case LOCALIZATION_FAILED: metrics.failedContainer(); + NMAuditLogger.logFailure(getUser(), + AuditConstants.FINISH_FAILED_CONTAINER, "ContainerImpl", + "Container failed with state: " + getContainerState(), + getContainerID().getAppId(), getContainerID()); break; case CONTAINER_CLEANEDUP_AFTER_KILL: metrics.endRunningContainer(); // fall through case NEW: metrics.killedContainer(); + NMAuditLogger.logSuccess(getUser(), + AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl", + getContainerID().getAppId(), getContainerID()); } metrics.releaseContainer(getLaunchContext().getResource()); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java index 061f4ee5ab..01508a2142 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java @@ -51,6 +51,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.api.protocolrecords.RefreshUserToGroupsMappingsRequest; import org.apache.hadoop.yarn.server.resourcemanager.api.protocolrecords.RefreshUserToGroupsMappingsResponse; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.service.AbstractService; public class AdminService extends AbstractService implements RMAdminProtocol { @@ -113,40 +114,54 @@ public void stop() { super.stop(); } - private void checkAcls(String method) throws YarnRemoteException { + private UserGroupInformation checkAcls(String method) throws YarnRemoteException { + UserGroupInformation user; try { - UserGroupInformation user = UserGroupInformation.getCurrentUser(); - if (!adminAcl.isUserAllowed(user)) { - LOG.warn("User " + user.getShortUserName() + " doesn't have permission" + - " to call '" + method + "'"); - - throw RPCUtil.getRemoteException( - new AccessControlException("User " + user.getShortUserName() + - " doesn't have permission" + - " to call '" + method + "'") - ); - } - - LOG.info("RM Admin: " + method + " invoked by user " + - user.getShortUserName()); - + user = UserGroupInformation.getCurrentUser(); } catch (IOException ioe) { LOG.warn("Couldn't get current user", ioe); + + RMAuditLogger.logFailure("UNKNOWN", method, + adminAcl.toString(), "AdminService", + "Couldn't get current user"); throw RPCUtil.getRemoteException(ioe); } + + if (!adminAcl.isUserAllowed(user)) { + LOG.warn("User " + user.getShortUserName() + " doesn't have permission" + + " to call '" + method + "'"); + + RMAuditLogger.logFailure(user.getShortUserName(), method, + adminAcl.toString(), "AdminService", + AuditConstants.UNAUTHORIZED_USER); + + throw RPCUtil.getRemoteException( + new AccessControlException("User " + user.getShortUserName() + + " doesn't have permission" + + " to call '" + method + "'") + ); + } + LOG.info("RM Admin: " + method + " invoked by user " + + user.getShortUserName()); + + return user; } @Override public RefreshQueuesResponse refreshQueues(RefreshQueuesRequest request) throws YarnRemoteException { - checkAcls("refreshQueues"); - + UserGroupInformation user = checkAcls("refreshQueues"); try { scheduler.reinitialize(conf, null, null); // ContainerTokenSecretManager can't // be 'refreshed' + RMAuditLogger.logSuccess(user.getShortUserName(), "refreshQueues", + "AdminService"); return recordFactory.newRecordInstance(RefreshQueuesResponse.class); } catch (IOException ioe) { LOG.info("Exception refreshing queues ", ioe); + RMAuditLogger.logFailure(user.getShortUserName(), "refreshQueues", + adminAcl.toString(), "AdminService", + "Exception refreshing queues"); throw RPCUtil.getRemoteException(ioe); } } @@ -154,12 +169,17 @@ public RefreshQueuesResponse refreshQueues(RefreshQueuesRequest request) @Override public RefreshNodesResponse refreshNodes(RefreshNodesRequest request) throws YarnRemoteException { - checkAcls("refreshNodes"); + UserGroupInformation user = checkAcls("refreshNodes"); try { this.nodesListManager.refreshNodes(); + RMAuditLogger.logSuccess(user.getShortUserName(), "refreshNodes", + "AdminService"); return recordFactory.newRecordInstance(RefreshNodesResponse.class); } catch (IOException ioe) { LOG.info("Exception refreshing nodes ", ioe); + RMAuditLogger.logFailure(user.getShortUserName(), "refreshNodes", + adminAcl.toString(), "AdminService", + "Exception refreshing nodes"); throw RPCUtil.getRemoteException(ioe); } } @@ -168,9 +188,11 @@ public RefreshNodesResponse refreshNodes(RefreshNodesRequest request) public RefreshSuperUserGroupsConfigurationResponse refreshSuperUserGroupsConfiguration( RefreshSuperUserGroupsConfigurationRequest request) throws YarnRemoteException { - checkAcls("refreshSuperUserGroupsConfiguration"); + UserGroupInformation user = checkAcls("refreshSuperUserGroupsConfiguration"); ProxyUsers.refreshSuperUserGroupsConfiguration(new Configuration()); + RMAuditLogger.logSuccess(user.getShortUserName(), + "refreshSuperUserGroupsConfiguration", "AdminService"); return recordFactory.newRecordInstance( RefreshSuperUserGroupsConfigurationResponse.class); @@ -179,9 +201,11 @@ public RefreshSuperUserGroupsConfigurationResponse refreshSuperUserGroupsConfigu @Override public RefreshUserToGroupsMappingsResponse refreshUserToGroupsMappings( RefreshUserToGroupsMappingsRequest request) throws YarnRemoteException { - checkAcls("refreshUserToGroupsMappings"); + UserGroupInformation user = checkAcls("refreshUserToGroupsMappings"); Groups.getUserToGroupsMappingService().refresh(); + RMAuditLogger.logSuccess(user.getShortUserName(), + "refreshUserToGroupsMappings", "AdminService"); return recordFactory.newRecordInstance( RefreshUserToGroupsMappingsResponse.class); @@ -190,12 +214,14 @@ public RefreshUserToGroupsMappingsResponse refreshUserToGroupsMappings( @Override public RefreshAdminAclsResponse refreshAdminAcls( RefreshAdminAclsRequest request) throws YarnRemoteException { - checkAcls("refreshAdminAcls"); + UserGroupInformation user = checkAcls("refreshAdminAcls"); Configuration conf = new Configuration(); adminAcl = new AccessControlList( conf.get(RMConfig.RM_ADMIN_ACL, RMConfig.DEFAULT_RM_ADMIN_ACL)); + RMAuditLogger.logSuccess(user.getShortUserName(), "refreshAdminAcls", + "AdminService"); return recordFactory.newRecordInstance(RefreshAdminAclsResponse.class); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java index fe0c8b2300..5f6f7d8b4c 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java @@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest; import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; import org.apache.hadoop.yarn.api.records.AMResponse; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ResourceRequest; @@ -49,6 +50,7 @@ import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.security.ApplicationTokenSecretManager; import org.apache.hadoop.yarn.security.SchedulerSecurityInfo; +import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; @@ -115,11 +117,16 @@ public RegisterApplicationMasterResponse registerApplicationMaster( ApplicationAttemptId applicationAttemptId = request .getApplicationAttemptId(); + ApplicationId appID = applicationAttemptId.getApplicationId(); AMResponse lastResponse = responseMap.get(applicationAttemptId); if (lastResponse == null) { String message = "Application doesn't exist in cache " + applicationAttemptId; LOG.error(message); + RMAuditLogger.logFailure(this.rmContext.getRMApps().get(appID).getUser(), + AuditConstants.REGISTER_AM, message, "ApplicationMasterService", + "Error in registering application master", appID, + applicationAttemptId); throw RPCUtil.getRemoteException(message); } @@ -133,6 +140,10 @@ public RegisterApplicationMasterResponse registerApplicationMaster( new RMAppAttemptRegistrationEvent(applicationAttemptId, request .getHost(), request.getRpcPort(), request.getTrackingUrl())); + RMAuditLogger.logSuccess(this.rmContext.getRMApps().get(appID).getUser(), + AuditConstants.REGISTER_AM, "ApplicationMasterService", appID, + applicationAttemptId); + // Pick up min/max resource from scheduler... RegisterApplicationMasterResponse response = recordFactory .newRecordInstance(RegisterApplicationMasterResponse.class); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index 83878c0cd7..dc6f6a796a 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -70,6 +70,7 @@ import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.security.client.ClientRMSecurityInfo; +import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; @@ -195,9 +196,11 @@ public SubmitApplicationResponse submitApplication( SubmitApplicationRequest request) throws YarnRemoteException { ApplicationSubmissionContext submissionContext = request .getApplicationSubmissionContext(); + ApplicationId applicationId = null; + String user = null; try { - String user = UserGroupInformation.getCurrentUser().getShortUserName(); - ApplicationId applicationId = submissionContext.getApplicationId(); + user = UserGroupInformation.getCurrentUser().getShortUserName(); + applicationId = submissionContext.getApplicationId(); if (rmContext.getRMApps().get(applicationId) != null) { throw new IOException("Application with id " + applicationId + " is already present! Cannot add a duplicate!"); @@ -207,8 +210,13 @@ public SubmitApplicationResponse submitApplication( LOG.info("Application with id " + applicationId.getId() + " submitted by user " + user + " with " + submissionContext); + RMAuditLogger.logSuccess(user, AuditConstants.SUBMIT_APP_REQUEST, + "ClientRMService", applicationId); } catch (IOException ie) { LOG.info("Exception in submitting application", ie); + RMAuditLogger.logFailure(user, AuditConstants.SUBMIT_APP_REQUEST, + ie.getMessage(), "ClientRMService", + "Exception in submitting application", applicationId); throw RPCUtil.getRemoteException(ie); } @@ -228,6 +236,9 @@ public FinishApplicationResponse finishApplication( callerUGI = UserGroupInformation.getCurrentUser(); } catch (IOException ie) { LOG.info("Error getting UGI ", ie); + RMAuditLogger.logFailure("UNKNOWN", AuditConstants.KILL_APP_REQUEST, + "UNKNOWN", "ClientRMService" , "Error getting UGI", + applicationId); throw RPCUtil.getRemoteException(ie); } @@ -235,6 +246,10 @@ public FinishApplicationResponse finishApplication( // TODO: What if null if (!checkAccess(callerUGI, application.getUser(), ApplicationACL.MODIFY_APP)) { + RMAuditLogger.logFailure(callerUGI.getShortUserName(), + AuditConstants.KILL_APP_REQUEST, + "User doesn't have MODIFY_APP permissions", "ClientRMService", + AuditConstants.UNAUTHORIZED_USER, applicationId); throw RPCUtil.getRemoteException(new AccessControlException("User " + callerUGI.getShortUserName() + " cannot perform operation " + ApplicationACL.MODIFY_APP.name() + " on " + applicationId)); @@ -243,6 +258,8 @@ public FinishApplicationResponse finishApplication( this.rmContext.getDispatcher().getEventHandler().handle( new RMAppEvent(applicationId, RMAppEventType.KILL)); + RMAuditLogger.logSuccess(callerUGI.getShortUserName(), + AuditConstants.KILL_APP_REQUEST, "ClientRMService" , applicationId); FinishApplicationResponse response = recordFactory .newRecordInstance(FinishApplicationResponse.class); return response; diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index 5ce9c015b2..0324908f2a 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -32,6 +32,8 @@ import org.apache.hadoop.yarn.security.ApplicationTokenIdentifier; import org.apache.hadoop.yarn.security.ApplicationTokenSecretManager; import org.apache.hadoop.yarn.security.client.ClientToAMSecretManager; +import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger; +import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; @@ -165,8 +167,39 @@ protected synchronized void addCompletedApp(ApplicationId appId) { LOG.error("RMAppManager received completed appId of null, skipping"); } else { completedApps.add(appId); + writeAuditLog(appId); } - }; + } + + protected void writeAuditLog(ApplicationId appId) { + RMApp app = rmContext.getRMApps().get(appId); + String operation = "UNKONWN"; + boolean success = false; + switch (app.getState()) { + case FAILED: + operation = AuditConstants.FINISH_FAILED_APP; + break; + case FINISHED: + operation = AuditConstants.FINISH_SUCCESS_APP; + success = true; + break; + case KILLED: + operation = AuditConstants.FINISH_KILLED_APP; + success = true; + break; + default: + } + + if (success) { + RMAuditLogger.logSuccess(app.getUser(), operation, + "RMAppManager", app.getApplicationId()); + } else { + StringBuilder diag = app.getDiagnostics(); + String msg = diag == null ? null : diag.toString(); + RMAuditLogger.logFailure(app.getUser(), operation, msg, "RMAppManager", + "App failed with state: " + app.getState(), appId); + } + } /* * check to see if hit the limit for max # completed apps kept diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java index 024f9eb491..13101fc95e 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java @@ -39,6 +39,8 @@ import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger; +import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; @@ -182,6 +184,10 @@ synchronized public void containerCompleted(RMContainer rmContainer, // Remove from the list of containers liveContainers.remove(rmContainer.getContainerId()); + + RMAuditLogger.logSuccess(getUser(), + AuditConstants.RELEASE_CONTAINER, "SchedulerApp", + getApplicationId(), containerId); // Update usage metrics Resource containerResource = rmContainer.getContainer().getResource(); @@ -217,6 +223,9 @@ synchronized public RMContainer allocate(NodeType type, SchedulerNode node, + " container=" + container.getId() + " host=" + container.getNodeId().getHost() + " type=" + type); } + RMAuditLogger.logSuccess(getUser(), + AuditConstants.ALLOC_CONTAINER, "SchedulerApp", + getApplicationId(), container.getId()); // Add it to allContainers list. newlyAllocatedContainers.add(rmContainer); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 37e3bb5780..21ae4ac666 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -46,6 +46,8 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger; +import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; @@ -433,8 +435,15 @@ public Allocation allocate(ApplicationAttemptId applicationAttemptId, // Release containers for (ContainerId releasedContainerId : release) { - completedContainer(getRMContainer(releasedContainerId), - RMContainerEventType.RELEASED); + RMContainer rmContainer = getRMContainer(releasedContainerId); + if (rmContainer == null) { + RMAuditLogger.logFailure(application.getUser(), + AuditConstants.RELEASE_CONTAINER, + "Unauthorized access or invalid container", "CapacityScheduler", + "Trying to release container not owned by app or with invalid id", + application.getApplicationId(), releasedContainerId); + } + completedContainer(rmContainer, RMContainerEventType.RELEASED); } synchronized (application) { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index be6c9048a2..06aea2c9a4 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -55,6 +55,8 @@ import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; +import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger; +import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store.RMState; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; @@ -225,8 +227,15 @@ public Allocation allocate( // Release containers for (ContainerId releasedContainer : release) { - containerCompleted(getRMContainer(releasedContainer), - RMContainerEventType.RELEASED); + RMContainer rmContainer = getRMContainer(releasedContainer); + if (rmContainer == null) { + RMAuditLogger.logFailure(application.getUser(), + AuditConstants.RELEASE_CONTAINER, + "Unauthorized access or invalid container", "FifoScheduler", + "Trying to release container not owned by app or with invalid id", + application.getApplicationId(), releasedContainer); + } + containerCompleted(rmContainer, RMContainerEventType.RELEASED); } if (!ask.isEmpty()) { @@ -642,6 +651,11 @@ private void containerLaunchedOnNode(Container container, SchedulerNode node) { @Lock(FifoScheduler.class) private synchronized void containerCompleted(RMContainer rmContainer, RMContainerEventType event) { + if (rmContainer == null) { + LOG.info("Null container completed..."); + return; + } + // Get the application for the finished container Container container = rmContainer.getContainer(); ApplicationAttemptId applicationAttemptId = container.getId().getAppAttemptId(); @@ -725,7 +739,7 @@ public synchronized SchedulerNodeReport getNodeReport(NodeId nodeId) { private RMContainer getRMContainer(ContainerId containerId) { SchedulerApp application = getApplication(containerId.getAppAttemptId()); - return application.getRMContainer(containerId); + return (application == null) ? null : application.getRMContainer(containerId); } } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java index f8f34ec592..6df30629ea 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java @@ -51,6 +51,11 @@ public MockRMApp(int newid, long time, RMAppState newState, String userName) { user = userName; } + public MockRMApp(int newid, long time, RMAppState newState, String userName, String diag) { + this(newid, time, newState, userName); + this.diagnostics = new StringBuilder(diag); + } + @Override public ApplicationId getApplicationId() { return id;