YARN-11463. Node Labels root directory creation doesn't have a retry logic (#5562)
Co-authored-by: Ashutosh Gupta <ashugpt@amazon.com>
This commit is contained in:
parent
9e24ed2196
commit
964c1902c8
@ -217,6 +217,16 @@ public class YarnConfiguration extends Configuration {
|
||||
|
||||
public static final int DEFAULT_RM_APPLICATION_MAX_TAG_LENGTH = 100;
|
||||
|
||||
public static final String NODE_STORE_ROOT_DIR_NUM_RETRIES =
|
||||
RM_PREFIX + "nodestore-rootdir.num-retries";
|
||||
|
||||
public static final int NODE_STORE_ROOT_DIR_NUM_DEFAULT_RETRIES = 1000;
|
||||
|
||||
public static final String NODE_STORE_ROOT_DIR_RETRY_INTERVAL =
|
||||
RM_PREFIX + "nodestore-rootdir.retry-interval-ms";
|
||||
|
||||
public static final int NODE_STORE_ROOT_DIR_RETRY_DEFAULT_INTERVAL = 1000;
|
||||
|
||||
public static final String RM_APPLICATION_MASTER_SERVICE_PROCESSORS =
|
||||
RM_PREFIX + "application-master-service.processors";
|
||||
|
||||
|
@ -65,8 +65,32 @@ public abstract class AbstractFSNodeStore<M> {
|
||||
this.fsWorkingPath = fsStorePath;
|
||||
this.manager = mgr;
|
||||
initFileSystem(conf);
|
||||
// mkdir of root dir path
|
||||
fs.mkdirs(fsWorkingPath);
|
||||
// mkdir of root dir path with retry logic
|
||||
int maxRetries = conf.getInt(YarnConfiguration.NODE_STORE_ROOT_DIR_NUM_RETRIES,
|
||||
YarnConfiguration.NODE_STORE_ROOT_DIR_NUM_DEFAULT_RETRIES);
|
||||
int retryCount = 0;
|
||||
boolean success = fs.mkdirs(fsWorkingPath);
|
||||
|
||||
while (!success && retryCount < maxRetries) {
|
||||
try {
|
||||
if (!fs.exists(fsWorkingPath)) {
|
||||
success = fs.mkdirs(fsWorkingPath);
|
||||
} else {
|
||||
success = true;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
retryCount++;
|
||||
if (retryCount >= maxRetries) {
|
||||
throw e;
|
||||
}
|
||||
try {
|
||||
Thread.sleep(conf.getInt(YarnConfiguration.NODE_STORE_ROOT_DIR_RETRY_INTERVAL,
|
||||
YarnConfiguration.NODE_STORE_ROOT_DIR_RETRY_DEFAULT_INTERVAL));
|
||||
} catch (InterruptedException ie) {
|
||||
throw new RuntimeException(ie);
|
||||
}
|
||||
}
|
||||
}
|
||||
this.replication = conf.getInt(YarnConfiguration.FS_STORE_FILE_REPLICATION,
|
||||
YarnConfiguration.DEFAULT_FS_STORE_FILE_REPLICATION);
|
||||
LOG.info("Created store directory :" + fsWorkingPath);
|
||||
|
@ -5177,4 +5177,20 @@
|
||||
<value>1</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>
|
||||
Number of Retries while trying to make root directory for node store.
|
||||
</description>
|
||||
<name>yarn.resourcemanager.nodestore-rootdir.num-retries</name>
|
||||
<value>1000</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<description>
|
||||
Interval in ms between retries while trying to make root directory for node store.
|
||||
</description>
|
||||
<name>yarn.resourcemanager.nodestore-rootdir.retry-interval-ms</name>
|
||||
<value>1000</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
||||
|
@ -359,9 +359,6 @@ public class TestFileSystemNodeLabelsStore extends NodeLabelTestBase {
|
||||
|
||||
mockStore.setFs(mockFs);
|
||||
verifyMkdirsCount(mockStore, true, 1);
|
||||
verifyMkdirsCount(mockStore, false, 2);
|
||||
verifyMkdirsCount(mockStore, true, 3);
|
||||
verifyMkdirsCount(mockStore, false, 4);
|
||||
}
|
||||
|
||||
private void verifyMkdirsCount(FileSystemNodeLabelsStore store,
|
||||
|
Loading…
x
Reference in New Issue
Block a user