From 60af8793b45b4057101a22e4248d7ca022b52d79 Mon Sep 17 00:00:00 2001 From: Abhishek Modi Date: Mon, 9 Sep 2019 14:26:00 +0530 Subject: [PATCH] YARN-9821. NM hangs at serviceStop when ATSV2 Backend Hbase is Down. Contributed by Prabhu Joseph. --- .../storage/HBaseTimelineWriterImpl.java | 74 +++++++++++-------- 1 file changed, 42 insertions(+), 32 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/HBaseTimelineWriterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/HBaseTimelineWriterImpl.java index a398febccc..dda004d7ec 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/HBaseTimelineWriterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/HBaseTimelineWriterImpl.java @@ -630,39 +630,49 @@ public void flush() throws IOException { */ @Override protected void serviceStop() throws Exception { - if (entityTable != null) { - LOG.info("closing the entity table"); - // The close API performs flushing and releases any resources held - entityTable.close(); + boolean isStorageUp = true; + try { + storageMonitor.checkStorageIsUp(); + } catch (IOException e) { + LOG.warn("Failed to close the timeline tables as Hbase is down", e); + isStorageUp = false; } - if (appToFlowTable != null) { - LOG.info("closing the app_flow table"); - // The close API performs flushing and releases any resources held - appToFlowTable.close(); - } - if (applicationTable != null) { - LOG.info("closing the application table"); - applicationTable.close(); - } - if (flowRunTable != null) { - LOG.info("closing the flow run table"); - // The close API performs flushing and releases any resources held - flowRunTable.close(); - } - if (flowActivityTable != null) { - LOG.info("closing the flowActivityTable table"); - // The close API performs flushing and releases any resources held - flowActivityTable.close(); - } - if (subApplicationTable != null) { - subApplicationTable.close(); - } - if (domainTable != null) { - domainTable.close(); - } - if (conn != null) { - LOG.info("closing the hbase Connection"); - conn.close(); + + if (isStorageUp) { + if (entityTable != null) { + LOG.info("closing the entity table"); + // The close API performs flushing and releases any resources held + entityTable.close(); + } + if (appToFlowTable != null) { + LOG.info("closing the app_flow table"); + // The close API performs flushing and releases any resources held + appToFlowTable.close(); + } + if (applicationTable != null) { + LOG.info("closing the application table"); + applicationTable.close(); + } + if (flowRunTable != null) { + LOG.info("closing the flow run table"); + // The close API performs flushing and releases any resources held + flowRunTable.close(); + } + if (flowActivityTable != null) { + LOG.info("closing the flowActivityTable table"); + // The close API performs flushing and releases any resources held + flowActivityTable.close(); + } + if (subApplicationTable != null) { + subApplicationTable.close(); + } + if (domainTable != null) { + domainTable.close(); + } + if (conn != null) { + LOG.info("closing the hbase Connection"); + conn.close(); + } } storageMonitor.stop(); super.serviceStop();