From 1cd55e0c171f7c4dec6f843931285557d59cd5ea Mon Sep 17 00:00:00 2001 From: Varun Vasudev Date: Tue, 2 Feb 2016 09:25:23 +0530 Subject: [PATCH] YARN-4649. Add additional logging to some NM state store operations. Contributed by Sidharta Seethana --- hadoop-yarn-project/CHANGES.txt | 3 ++ .../ContainerManagerImpl.java | 14 ++++++ .../recovery/NMLeveldbStateStoreService.java | 44 +++++++++++++++++++ .../recovery/NMStateStoreService.java | 11 +++++ 4 files changed, 72 insertions(+) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 90742db1ad..bf468647cb 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -115,6 +115,9 @@ Release 2.9.0 - UNRELEASED YARN-4647. Make RegisterNodeManagerRequestPBImpl thread-safe. (kasha) + YARN-4649. Add additional logging to some NM state store operations. + (Sidharta Seethana via vvasudev) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index d0663d5f20..7d51477f21 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -286,18 +286,32 @@ private void recover() throws IOException, URISyntaxException { RecoveredApplicationsState appsState = stateStore.loadApplicationsState(); for (ContainerManagerApplicationProto proto : appsState.getApplications()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Recovering application with state: " + proto.toString()); + } recoverApplication(proto); } for (RecoveredContainerState rcs : stateStore.loadContainersState()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Recovering container with state: " + rcs); + } + recoverContainer(rcs); } String diagnostic = "Application marked finished during recovery"; for (ApplicationId appId : appsState.getFinishedApplications()) { + + if (LOG.isDebugEnabled()) { + LOG.debug("Application marked finished during recovery: " + appId); + } + dispatcher.getEventHandler().handle( new ApplicationFinishEvent(appId, diagnostic)); } + } else { + LOG.info("Not a recoverable state store. Nothing to recover."); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java index 89c71bb890..81d6c57de6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java @@ -248,6 +248,11 @@ private RecoveredContainerState loadContainerState(ContainerId containerId, @Override public void storeContainer(ContainerId containerId, StartContainerRequest startRequest) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("storeContainer: containerId= " + containerId + + ", startRequest= " + startRequest); + } + String key = CONTAINERS_KEY_PREFIX + containerId.toString() + CONTAINER_REQUEST_KEY_SUFFIX; try { @@ -261,6 +266,11 @@ public void storeContainer(ContainerId containerId, @Override public void storeContainerDiagnostics(ContainerId containerId, StringBuilder diagnostics) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("storeContainerDiagnostics: containerId=" + containerId + + ", diagnostics=" + diagnostics); + } + String key = CONTAINERS_KEY_PREFIX + containerId.toString() + CONTAINER_DIAGS_KEY_SUFFIX; try { @@ -273,6 +283,10 @@ public void storeContainerDiagnostics(ContainerId containerId, @Override public void storeContainerLaunched(ContainerId containerId) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("storeContainerLaunched: containerId=" + containerId); + } + String key = CONTAINERS_KEY_PREFIX + containerId.toString() + CONTAINER_LAUNCHED_KEY_SUFFIX; try { @@ -285,6 +299,11 @@ public void storeContainerLaunched(ContainerId containerId) @Override public void storeContainerResourceChanged(ContainerId containerId, Resource capability) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("storeContainerResourceChanged: containerId=" + containerId + + ", capability=" + capability); + } + String key = CONTAINERS_KEY_PREFIX + containerId.toString() + CONTAINER_RESOURCE_CHANGED_KEY_SUFFIX; try { @@ -299,6 +318,10 @@ public void storeContainerResourceChanged(ContainerId containerId, @Override public void storeContainerKilled(ContainerId containerId) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("storeContainerKilled: containerId=" + containerId); + } + String key = CONTAINERS_KEY_PREFIX + containerId.toString() + CONTAINER_KILLED_KEY_SUFFIX; try { @@ -311,6 +334,10 @@ public void storeContainerKilled(ContainerId containerId) @Override public void storeContainerCompleted(ContainerId containerId, int exitCode) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("storeContainerCompleted: containerId=" + containerId); + } + String key = CONTAINERS_KEY_PREFIX + containerId.toString() + CONTAINER_EXIT_CODE_KEY_SUFFIX; try { @@ -323,6 +350,10 @@ public void storeContainerCompleted(ContainerId containerId, @Override public void removeContainer(ContainerId containerId) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("removeContainer: containerId=" + containerId); + } + String keyPrefix = CONTAINERS_KEY_PREFIX + containerId.toString(); try { WriteBatch batch = db.createWriteBatch(); @@ -389,6 +420,11 @@ public RecoveredApplicationsState loadApplicationsState() @Override public void storeApplication(ApplicationId appId, ContainerManagerApplicationProto p) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("storeApplication: appId=" + appId + + ", proto=" + p); + } + String key = APPLICATIONS_KEY_PREFIX + appId; try { db.put(bytes(key), p.toByteArray()); @@ -400,6 +436,10 @@ public void storeApplication(ApplicationId appId, @Override public void storeFinishedApplication(ApplicationId appId) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("storeFinishedApplication.appId: " + appId); + } + String key = FINISHED_APPS_KEY_PREFIX + appId; try { db.put(bytes(key), new byte[0]); @@ -411,6 +451,10 @@ public void storeFinishedApplication(ApplicationId appId) @Override public void removeApplication(ApplicationId appId) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("removeApplication: appId=" + appId); + } + try { WriteBatch batch = db.createWriteBatch(); try { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java index e8ccf541cf..84c5aa982a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java @@ -100,6 +100,17 @@ public StartContainerRequest getStartRequest() { public Resource getCapability() { return capability; } + + @Override + public String toString() { + return new StringBuffer("Status: ").append(getStatus()) + .append(", Exit code: ").append(exitCode) + .append(", Killed: ").append(getKilled()) + .append(", Diagnostics: ").append(getDiagnostics()) + .append(", Capability: ").append(getCapability()) + .append(", StartRequest: ").append(getStartRequest()) + .toString(); + } } public static class LocalResourceTrackerState {