YARN-10789. RM HA startup can fail due to race conditions in ZKConfigurationStore. Contributed by Tarun Parimi
This commit is contained in:
parent
797c595ba5
commit
a272adc5fa
@ -19,6 +19,7 @@
|
|||||||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.conf;
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.conf;
|
||||||
|
|
||||||
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
|
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
|
||||||
|
import org.apache.zookeeper.KeeperException.NodeExistsException;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
@ -61,7 +62,8 @@ public class ZKConfigurationStore extends YarnConfigurationStore {
|
|||||||
private static final String CONF_STORE_PATH = "CONF_STORE";
|
private static final String CONF_STORE_PATH = "CONF_STORE";
|
||||||
private static final String FENCING_PATH = "FENCING";
|
private static final String FENCING_PATH = "FENCING";
|
||||||
private static final String CONF_VERSION_PATH = "CONF_VERSION";
|
private static final String CONF_VERSION_PATH = "CONF_VERSION";
|
||||||
|
private static final String NODEEXISTS_MSG = "Encountered NodeExists error."
|
||||||
|
+ " Skipping znode creation since another RM has already created it";
|
||||||
private String zkVersionPath;
|
private String zkVersionPath;
|
||||||
private String logsPath;
|
private String logsPath;
|
||||||
private String confStorePath;
|
private String confStorePath;
|
||||||
@ -92,7 +94,11 @@ public void initialize(Configuration config, Configuration schedConf,
|
|||||||
this.fencingNodePath = getNodePath(znodeParentPath, FENCING_PATH);
|
this.fencingNodePath = getNodePath(znodeParentPath, FENCING_PATH);
|
||||||
this.confVersionPath = getNodePath(znodeParentPath, CONF_VERSION_PATH);
|
this.confVersionPath = getNodePath(znodeParentPath, CONF_VERSION_PATH);
|
||||||
|
|
||||||
|
try {
|
||||||
zkManager.createRootDirRecursively(znodeParentPath, zkAcl);
|
zkManager.createRootDirRecursively(znodeParentPath, zkAcl);
|
||||||
|
} catch(NodeExistsException e) {
|
||||||
|
LOG.warn(NODEEXISTS_MSG, e);
|
||||||
|
}
|
||||||
zkManager.delete(fencingNodePath);
|
zkManager.delete(fencingNodePath);
|
||||||
|
|
||||||
if (createNewZkPath(logsPath)) {
|
if (createNewZkPath(logsPath)) {
|
||||||
@ -244,7 +250,12 @@ public List<LogMutation> getConfirmedConfHistory(long fromId) {
|
|||||||
*/
|
*/
|
||||||
private boolean createNewZkPath(String path) throws Exception {
|
private boolean createNewZkPath(String path) throws Exception {
|
||||||
if (!zkManager.exists(path)) {
|
if (!zkManager.exists(path)) {
|
||||||
|
try {
|
||||||
zkManager.create(path);
|
zkManager.create(path);
|
||||||
|
} catch(NodeExistsException e) {
|
||||||
|
LOG.warn(NODEEXISTS_MSG, e);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return false;
|
||||||
@ -279,8 +290,12 @@ private void safeSetZkData(String path, Object data) throws Exception {
|
|||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
protected void safeCreateZkData(String path, byte[] data) throws Exception {
|
protected void safeCreateZkData(String path, byte[] data) throws Exception {
|
||||||
|
try {
|
||||||
zkManager.safeCreate(path, data, zkAcl, CreateMode.PERSISTENT,
|
zkManager.safeCreate(path, data, zkAcl, CreateMode.PERSISTENT,
|
||||||
zkAcl, fencingNodePath);
|
zkAcl, fencingNodePath);
|
||||||
|
} catch(NodeExistsException e) {
|
||||||
|
LOG.warn(NODEEXISTS_MSG, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String getNodePath(String root, String nodeName) {
|
private static String getNodePath(String root, String nodeName) {
|
||||||
|
Loading…
Reference in New Issue
Block a user