diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 6d77eb492d..a3faec7171 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -217,6 +217,16 @@ private static void addDeprecatedKeys() { public static final int DEFAULT_RM_APPLICATION_MAX_TAG_LENGTH = 100; + public static final String NODE_STORE_ROOT_DIR_NUM_RETRIES = + RM_PREFIX + "nodestore-rootdir.num-retries"; + + public static final int NODE_STORE_ROOT_DIR_NUM_DEFAULT_RETRIES = 1000; + + public static final String NODE_STORE_ROOT_DIR_RETRY_INTERVAL = + RM_PREFIX + "nodestore-rootdir.retry-interval-ms"; + + public static final int NODE_STORE_ROOT_DIR_RETRY_DEFAULT_INTERVAL = 1000; + public static final String RM_APPLICATION_MASTER_SERVICE_PROCESSORS = RM_PREFIX + "application-master-service.processors"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java index 81514942af..a697be1951 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/store/AbstractFSNodeStore.java @@ -65,8 +65,32 @@ protected void initStore(Configuration conf, Path fsStorePath, this.fsWorkingPath = fsStorePath; this.manager = mgr; initFileSystem(conf); - // mkdir of root dir path - fs.mkdirs(fsWorkingPath); + // mkdir of root dir path with retry logic + int maxRetries = conf.getInt(YarnConfiguration.NODE_STORE_ROOT_DIR_NUM_RETRIES, + YarnConfiguration.NODE_STORE_ROOT_DIR_NUM_DEFAULT_RETRIES); + int retryCount = 0; + boolean success = fs.mkdirs(fsWorkingPath); + + while (!success && retryCount < maxRetries) { + try { + if (!fs.exists(fsWorkingPath)) { + success = fs.mkdirs(fsWorkingPath); + } else { + success = true; + } + } catch (IOException e) { + retryCount++; + if (retryCount >= maxRetries) { + throw e; + } + try { + Thread.sleep(conf.getInt(YarnConfiguration.NODE_STORE_ROOT_DIR_RETRY_INTERVAL, + YarnConfiguration.NODE_STORE_ROOT_DIR_RETRY_DEFAULT_INTERVAL)); + } catch (InterruptedException ie) { + throw new RuntimeException(ie); + } + } + } this.replication = conf.getInt(YarnConfiguration.FS_STORE_FILE_REPLICATION, YarnConfiguration.DEFAULT_FS_STORE_FILE_REPLICATION); LOG.info("Created store directory :" + fsWorkingPath); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index b9385d1c27..4fc414f0e0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -5177,4 +5177,20 @@ 1 + + + Number of Retries while trying to make root directory for node store. + + yarn.resourcemanager.nodestore-rootdir.num-retries + 1000 + + + + + Interval in ms between retries while trying to make root directory for node store. + + yarn.resourcemanager.nodestore-rootdir.retry-interval-ms + 1000 + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java index 099684318f..a861b0654e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/nodelabels/TestFileSystemNodeLabelsStore.java @@ -359,9 +359,6 @@ public void initFileSystem(Configuration config) throws IOException { mockStore.setFs(mockFs); verifyMkdirsCount(mockStore, true, 1); - verifyMkdirsCount(mockStore, false, 2); - verifyMkdirsCount(mockStore, true, 3); - verifyMkdirsCount(mockStore, false, 4); } private void verifyMkdirsCount(FileSystemNodeLabelsStore store,