YARN-819. ResourceManager and NodeManager should check for a minimum allowed version (Robert Parker via jeagles)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1526660 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7e73b9cde6
commit
5a15c392a1
@ -36,6 +36,9 @@ Release 2.3.0 - UNRELEASED
|
|||||||
YARN-353. Add Zookeeper-based store implementation for RMStateStore.
|
YARN-353. Add Zookeeper-based store implementation for RMStateStore.
|
||||||
(Bikas Saha, Jian He and Karthik Kambatla via hitesh)
|
(Bikas Saha, Jian He and Karthik Kambatla via hitesh)
|
||||||
|
|
||||||
|
YARN-819. ResourceManager and NodeManager should check for a minimum allowed
|
||||||
|
version (Robert Parker via jeagles)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
@ -362,6 +362,13 @@ public class YarnConfiguration extends Configuration {
|
|||||||
|
|
||||||
public static final long DEFAULT_RM_NMTOKEN_MASTER_KEY_ROLLING_INTERVAL_SECS =
|
public static final long DEFAULT_RM_NMTOKEN_MASTER_KEY_ROLLING_INTERVAL_SECS =
|
||||||
24 * 60 * 60;
|
24 * 60 * 60;
|
||||||
|
|
||||||
|
public static final String RM_NODEMANAGER_MINIMUM_VERSION =
|
||||||
|
RM_PREFIX + "nodemanager.minimum.version";
|
||||||
|
|
||||||
|
public static final String DEFAULT_RM_NODEMANAGER_MINIMUM_VERSION =
|
||||||
|
"NONE";
|
||||||
|
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
// Node Manager Configs
|
// Node Manager Configs
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
@ -460,6 +467,10 @@ public class YarnConfiguration extends Configuration {
|
|||||||
public static final String NM_LOG_DIRS = NM_PREFIX + "log-dirs";
|
public static final String NM_LOG_DIRS = NM_PREFIX + "log-dirs";
|
||||||
public static final String DEFAULT_NM_LOG_DIRS = "/tmp/logs";
|
public static final String DEFAULT_NM_LOG_DIRS = "/tmp/logs";
|
||||||
|
|
||||||
|
public static final String NM_RESOURCEMANAGER_MINIMUM_VERSION =
|
||||||
|
NM_PREFIX + "resourcemanager.minimum.version";
|
||||||
|
public static final String DEFAULT_NM_RESOURCEMANAGER_MINIMUM_VERSION = "NONE";
|
||||||
|
|
||||||
/** Interval at which the delayed token removal thread runs */
|
/** Interval at which the delayed token removal thread runs */
|
||||||
public static final String RM_DELAYED_DELEGATION_TOKEN_REMOVAL_INTERVAL_MS =
|
public static final String RM_DELAYED_DELEGATION_TOKEN_REMOVAL_INTERVAL_MS =
|
||||||
RM_PREFIX + "delayed.delegation-token.removal-interval-ms";
|
RM_PREFIX + "delayed.delegation-token.removal-interval-ms";
|
||||||
|
@ -358,6 +358,14 @@
|
|||||||
<value>1000</value>
|
<value>1000</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>The minimum allowed version of a connecting nodemanager. The valid values are
|
||||||
|
NONE (no version checking), EqualToRM (the nodemanager's version is equal to
|
||||||
|
or greater than the RM version), or a Version String.</description>
|
||||||
|
<name>yarn.resourcemanager.nodemanager.minimum.version</name>
|
||||||
|
<value>NONE</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>Enable a set of periodic monitors (specified in
|
<description>Enable a set of periodic monitors (specified in
|
||||||
yarn.resourcemanager.scheduler.monitor.policies) that affect the
|
yarn.resourcemanager.scheduler.monitor.policies) that affect the
|
||||||
@ -737,6 +745,14 @@
|
|||||||
<value>30</value>
|
<value>30</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>The minimum allowed version of a resourcemanager that a nodemanager will connect to.
|
||||||
|
The valid values are NONE (no version checking), EqualToNM (the resourcemanager's version is
|
||||||
|
equal to or greater than the NM version), or a Version String.</description>
|
||||||
|
<name>yarn.nodemanager.resourcemanager.minimum.version</name>
|
||||||
|
<value>NONE</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>Max number of threads in NMClientAsync to process container
|
<description>Max number of threads in NMClientAsync to process container
|
||||||
management events</description>
|
management events</description>
|
||||||
|
@ -25,8 +25,10 @@ public interface RegisterNodeManagerRequest {
|
|||||||
NodeId getNodeId();
|
NodeId getNodeId();
|
||||||
int getHttpPort();
|
int getHttpPort();
|
||||||
Resource getResource();
|
Resource getResource();
|
||||||
|
String getNMVersion();
|
||||||
|
|
||||||
void setNodeId(NodeId nodeId);
|
void setNodeId(NodeId nodeId);
|
||||||
void setHttpPort(int port);
|
void setHttpPort(int port);
|
||||||
void setResource(Resource resource);
|
void setResource(Resource resource);
|
||||||
|
void setNMVersion(String version);
|
||||||
}
|
}
|
||||||
|
@ -42,4 +42,7 @@ public interface RegisterNodeManagerResponse {
|
|||||||
|
|
||||||
void setDiagnosticsMessage(String diagnosticsMessage);
|
void setDiagnosticsMessage(String diagnosticsMessage);
|
||||||
|
|
||||||
|
void setRMVersion(String version);
|
||||||
|
|
||||||
|
String getRMVersion();
|
||||||
}
|
}
|
||||||
|
@ -139,6 +139,21 @@ public void setHttpPort(int httpPort) {
|
|||||||
builder.setHttpPort(httpPort);
|
builder.setHttpPort(httpPort);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getNMVersion() {
|
||||||
|
RegisterNodeManagerRequestProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
|
if (!p.hasNmVersion()) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
return (p.getNmVersion());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNMVersion(String version) {
|
||||||
|
maybeInitBuilder();
|
||||||
|
builder.setNmVersion(version);
|
||||||
|
}
|
||||||
|
|
||||||
private NodeIdPBImpl convertFromProtoFormat(NodeIdProto p) {
|
private NodeIdPBImpl convertFromProtoFormat(NodeIdProto p) {
|
||||||
return new NodeIdPBImpl(p);
|
return new NodeIdPBImpl(p);
|
||||||
}
|
}
|
||||||
|
@ -150,6 +150,25 @@ public void setDiagnosticsMessage(String diagnosticsMessage) {
|
|||||||
builder.setDiagnosticsMessage((diagnosticsMessage));
|
builder.setDiagnosticsMessage((diagnosticsMessage));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getRMVersion() {
|
||||||
|
RegisterNodeManagerResponseProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
|
if (!p.hasRmVersion()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return p.getRmVersion();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setRMVersion(String rmVersion) {
|
||||||
|
maybeInitBuilder();
|
||||||
|
if (rmVersion == null) {
|
||||||
|
builder.clearRmIdentifier();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
builder.setRmVersion(rmVersion);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public NodeAction getNodeAction() {
|
public NodeAction getNodeAction() {
|
||||||
RegisterNodeManagerResponseProtoOrBuilder p = viaProto ? proto : builder;
|
RegisterNodeManagerResponseProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
|
@ -29,6 +29,7 @@ message RegisterNodeManagerRequestProto {
|
|||||||
optional NodeIdProto node_id = 1;
|
optional NodeIdProto node_id = 1;
|
||||||
optional int32 http_port = 3;
|
optional int32 http_port = 3;
|
||||||
optional ResourceProto resource = 4;
|
optional ResourceProto resource = 4;
|
||||||
|
optional string nm_version = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
message RegisterNodeManagerResponseProto {
|
message RegisterNodeManagerResponseProto {
|
||||||
@ -37,6 +38,7 @@ message RegisterNodeManagerResponseProto {
|
|||||||
optional NodeActionProto nodeAction = 3;
|
optional NodeActionProto nodeAction = 3;
|
||||||
optional int64 rm_identifier = 4;
|
optional int64 rm_identifier = 4;
|
||||||
optional string diagnostics_message = 5;
|
optional string diagnostics_message = 5;
|
||||||
|
optional string rm_version = 6;
|
||||||
}
|
}
|
||||||
|
|
||||||
message NodeHeartbeatRequestProto {
|
message NodeHeartbeatRequestProto {
|
||||||
@ -45,7 +47,6 @@ message NodeHeartbeatRequestProto {
|
|||||||
optional MasterKeyProto last_known_nm_token_master_key = 3;
|
optional MasterKeyProto last_known_nm_token_master_key = 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
message NodeHeartbeatResponseProto {
|
message NodeHeartbeatResponseProto {
|
||||||
optional int32 response_id = 1;
|
optional int32 response_id = 1;
|
||||||
optional MasterKeyProto container_token_master_key = 2;
|
optional MasterKeyProto container_token_master_key = 2;
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
import org.apache.hadoop.ipc.RPC;
|
import org.apache.hadoop.ipc.RPC;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.service.AbstractService;
|
import org.apache.hadoop.service.AbstractService;
|
||||||
|
import org.apache.hadoop.util.VersionUtil;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerState;
|
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||||
@ -63,6 +64,7 @@
|
|||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
|
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
|
||||||
|
import org.apache.hadoop.yarn.util.YarnVersionInfo;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
@ -84,6 +86,8 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
|||||||
private ResourceTracker resourceTracker;
|
private ResourceTracker resourceTracker;
|
||||||
private Resource totalResource;
|
private Resource totalResource;
|
||||||
private int httpPort;
|
private int httpPort;
|
||||||
|
private String nodeManagerVersionId;
|
||||||
|
private String minimumResourceManagerVersion;
|
||||||
private volatile boolean isStopped;
|
private volatile boolean isStopped;
|
||||||
private RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
|
private RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
|
||||||
private boolean tokenKeepAliveEnabled;
|
private boolean tokenKeepAliveEnabled;
|
||||||
@ -139,6 +143,10 @@ protected void serviceInit(Configuration conf) throws Exception {
|
|||||||
conf.getInt(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS,
|
conf.getInt(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS,
|
||||||
YarnConfiguration.DEFAULT_RM_NM_EXPIRY_INTERVAL_MS);
|
YarnConfiguration.DEFAULT_RM_NM_EXPIRY_INTERVAL_MS);
|
||||||
|
|
||||||
|
this.minimumResourceManagerVersion = conf.get(
|
||||||
|
YarnConfiguration.NM_RESOURCEMANAGER_MINIMUM_VERSION,
|
||||||
|
YarnConfiguration.DEFAULT_NM_RESOURCEMANAGER_MINIMUM_VERSION);
|
||||||
|
|
||||||
// Default duration to track stopped containers on nodemanager is 10Min.
|
// Default duration to track stopped containers on nodemanager is 10Min.
|
||||||
// This should not be assigned very large value as it will remember all the
|
// This should not be assigned very large value as it will remember all the
|
||||||
// containers stopped during that time.
|
// containers stopped during that time.
|
||||||
@ -168,6 +176,7 @@ protected void serviceStart() throws Exception {
|
|||||||
// NodeManager is the last service to start, so NodeId is available.
|
// NodeManager is the last service to start, so NodeId is available.
|
||||||
this.nodeId = this.context.getNodeId();
|
this.nodeId = this.context.getNodeId();
|
||||||
this.httpPort = this.context.getHttpPort();
|
this.httpPort = this.context.getHttpPort();
|
||||||
|
this.nodeManagerVersionId = YarnVersionInfo.getVersion();
|
||||||
try {
|
try {
|
||||||
// Registration has to be in start so that ContainerManager can get the
|
// Registration has to be in start so that ContainerManager can get the
|
||||||
// perNM tokens needed to authenticate ContainerTokens.
|
// perNM tokens needed to authenticate ContainerTokens.
|
||||||
@ -235,6 +244,7 @@ protected void registerWithRM() throws YarnException, IOException {
|
|||||||
request.setHttpPort(this.httpPort);
|
request.setHttpPort(this.httpPort);
|
||||||
request.setResource(this.totalResource);
|
request.setResource(this.totalResource);
|
||||||
request.setNodeId(this.nodeId);
|
request.setNodeId(this.nodeId);
|
||||||
|
request.setNMVersion(this.nodeManagerVersionId);
|
||||||
RegisterNodeManagerResponse regNMResponse =
|
RegisterNodeManagerResponse regNMResponse =
|
||||||
resourceTracker.registerNodeManager(request);
|
resourceTracker.registerNodeManager(request);
|
||||||
this.rmIdentifier = regNMResponse.getRMIdentifier();
|
this.rmIdentifier = regNMResponse.getRMIdentifier();
|
||||||
@ -248,6 +258,26 @@ protected void registerWithRM() throws YarnException, IOException {
|
|||||||
+ message);
|
+ message);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if ResourceManager version is too old then shutdown
|
||||||
|
if (!minimumResourceManagerVersion.equals("NONE")){
|
||||||
|
if (minimumResourceManagerVersion.equals("EqualToNM")){
|
||||||
|
minimumResourceManagerVersion = nodeManagerVersionId;
|
||||||
|
}
|
||||||
|
String rmVersion = regNMResponse.getRMVersion();
|
||||||
|
if (rmVersion == null) {
|
||||||
|
String message = "The Resource Manager's did not return a version. "
|
||||||
|
+ "Valid version cannot be checked.";
|
||||||
|
throw new YarnRuntimeException("Shutting down the Node Manager. "
|
||||||
|
+ message);
|
||||||
|
}
|
||||||
|
if (VersionUtil.compareVersions(rmVersion,minimumResourceManagerVersion) < 0) {
|
||||||
|
String message = "The Resource Manager's version ("
|
||||||
|
+ rmVersion +") is less than the minimum "
|
||||||
|
+ "allowed version " + minimumResourceManagerVersion;
|
||||||
|
throw new YarnRuntimeException("Shutting down the Node Manager on RM "
|
||||||
|
+ "version error, " + message);
|
||||||
|
}
|
||||||
|
}
|
||||||
MasterKey masterKey = regNMResponse.getContainerTokenMasterKey();
|
MasterKey masterKey = regNMResponse.getContainerTokenMasterKey();
|
||||||
// do this now so that its set before we start heartbeating to RM
|
// do this now so that its set before we start heartbeating to RM
|
||||||
// It is expected that status updater is started by this point and
|
// It is expected that status updater is started by this point and
|
||||||
|
@ -471,6 +471,7 @@ private class MyResourceTracker2 implements ResourceTracker {
|
|||||||
public NodeAction heartBeatNodeAction = NodeAction.NORMAL;
|
public NodeAction heartBeatNodeAction = NodeAction.NORMAL;
|
||||||
public NodeAction registerNodeAction = NodeAction.NORMAL;
|
public NodeAction registerNodeAction = NodeAction.NORMAL;
|
||||||
public String shutDownMessage = "";
|
public String shutDownMessage = "";
|
||||||
|
public String rmVersion = "3.0.1";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public RegisterNodeManagerResponse registerNodeManager(
|
public RegisterNodeManagerResponse registerNodeManager(
|
||||||
@ -483,6 +484,7 @@ public RegisterNodeManagerResponse registerNodeManager(
|
|||||||
response.setContainerTokenMasterKey(createMasterKey());
|
response.setContainerTokenMasterKey(createMasterKey());
|
||||||
response.setNMTokenMasterKey(createMasterKey());
|
response.setNMTokenMasterKey(createMasterKey());
|
||||||
response.setDiagnosticsMessage(shutDownMessage);
|
response.setDiagnosticsMessage(shutDownMessage);
|
||||||
|
response.setRMVersion(rmVersion);
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
@ -1180,6 +1182,44 @@ public void testNodeStatusUpdaterRetryAndNMShutdown()
|
|||||||
" connectionWaitSecs and RetryIntervalSecs", heartBeatID == 2);
|
" connectionWaitSecs and RetryIntervalSecs", heartBeatID == 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRMVersionLessThanMinimum() throws InterruptedException {
|
||||||
|
final AtomicInteger numCleanups = new AtomicInteger(0);
|
||||||
|
YarnConfiguration conf = createNMConfig();
|
||||||
|
conf.set(YarnConfiguration.NM_RESOURCEMANAGER_MINIMUM_VERSION, "3.0.0");
|
||||||
|
nm = new NodeManager() {
|
||||||
|
@Override
|
||||||
|
protected NodeStatusUpdater createNodeStatusUpdater(Context context,
|
||||||
|
Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
|
||||||
|
MyNodeStatusUpdater myNodeStatusUpdater = new MyNodeStatusUpdater(
|
||||||
|
context, dispatcher, healthChecker, metrics);
|
||||||
|
MyResourceTracker2 myResourceTracker2 = new MyResourceTracker2();
|
||||||
|
myResourceTracker2.heartBeatNodeAction = NodeAction.NORMAL;
|
||||||
|
myResourceTracker2.rmVersion = "3.0.0";
|
||||||
|
myNodeStatusUpdater.resourceTracker = myResourceTracker2;
|
||||||
|
return myNodeStatusUpdater;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void cleanupContainers(NodeManagerEventType eventType) {
|
||||||
|
super.cleanupContainers(NodeManagerEventType.SHUTDOWN);
|
||||||
|
numCleanups.incrementAndGet();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
nm.init(conf);
|
||||||
|
nm.start();
|
||||||
|
|
||||||
|
// NM takes a while to reach the STARTED state.
|
||||||
|
int waitCount = 0;
|
||||||
|
while (nm.getServiceState() != STATE.STARTED && waitCount++ != 20) {
|
||||||
|
LOG.info("Waiting for NM to stop..");
|
||||||
|
Thread.sleep(1000);
|
||||||
|
}
|
||||||
|
Assert.assertTrue(nm.getServiceState() == STATE.STARTED);
|
||||||
|
nm.stop();
|
||||||
|
}
|
||||||
|
|
||||||
private class MyNMContext extends NMContext {
|
private class MyNMContext extends NMContext {
|
||||||
ConcurrentMap<ContainerId, Container> containers =
|
ConcurrentMap<ContainerId, Container> containers =
|
||||||
new ConcurrentSkipListMap<ContainerId, Container>();
|
new ConcurrentSkipListMap<ContainerId, Container>();
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
import org.apache.hadoop.net.Node;
|
import org.apache.hadoop.net.Node;
|
||||||
import org.apache.hadoop.security.authorize.PolicyProvider;
|
import org.apache.hadoop.security.authorize.PolicyProvider;
|
||||||
import org.apache.hadoop.service.AbstractService;
|
import org.apache.hadoop.service.AbstractService;
|
||||||
|
import org.apache.hadoop.util.VersionUtil;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
@ -55,6 +56,7 @@
|
|||||||
import org.apache.hadoop.yarn.server.resourcemanager.security.authorize.RMPolicyProvider;
|
import org.apache.hadoop.yarn.server.resourcemanager.security.authorize.RMPolicyProvider;
|
||||||
import org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils;
|
import org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils;
|
||||||
import org.apache.hadoop.yarn.util.RackResolver;
|
import org.apache.hadoop.yarn.util.RackResolver;
|
||||||
|
import org.apache.hadoop.yarn.util.YarnVersionInfo;
|
||||||
|
|
||||||
public class ResourceTrackerService extends AbstractService implements
|
public class ResourceTrackerService extends AbstractService implements
|
||||||
ResourceTracker {
|
ResourceTracker {
|
||||||
@ -73,6 +75,7 @@ public class ResourceTrackerService extends AbstractService implements
|
|||||||
private long nextHeartBeatInterval;
|
private long nextHeartBeatInterval;
|
||||||
private Server server;
|
private Server server;
|
||||||
private InetSocketAddress resourceTrackerAddress;
|
private InetSocketAddress resourceTrackerAddress;
|
||||||
|
private String minimumNodeManagerVersion;
|
||||||
|
|
||||||
private static final NodeHeartbeatResponse resync = recordFactory
|
private static final NodeHeartbeatResponse resync = recordFactory
|
||||||
.newRecordInstance(NodeHeartbeatResponse.class);
|
.newRecordInstance(NodeHeartbeatResponse.class);
|
||||||
@ -99,6 +102,7 @@ public ResourceTrackerService(RMContext rmContext,
|
|||||||
this.nmLivelinessMonitor = nmLivelinessMonitor;
|
this.nmLivelinessMonitor = nmLivelinessMonitor;
|
||||||
this.containerTokenSecretManager = containerTokenSecretManager;
|
this.containerTokenSecretManager = containerTokenSecretManager;
|
||||||
this.nmTokenSecretManager = nmTokenSecretManager;
|
this.nmTokenSecretManager = nmTokenSecretManager;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -125,6 +129,10 @@ protected void serviceInit(Configuration conf) throws Exception {
|
|||||||
YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES,
|
YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES,
|
||||||
YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
|
YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES);
|
||||||
|
|
||||||
|
minimumNodeManagerVersion = conf.get(
|
||||||
|
YarnConfiguration.RM_NODEMANAGER_MINIMUM_VERSION,
|
||||||
|
YarnConfiguration.DEFAULT_RM_NODEMANAGER_MINIMUM_VERSION);
|
||||||
|
|
||||||
super.serviceInit(conf);
|
super.serviceInit(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -172,10 +180,30 @@ public RegisterNodeManagerResponse registerNodeManager(
|
|||||||
int cmPort = nodeId.getPort();
|
int cmPort = nodeId.getPort();
|
||||||
int httpPort = request.getHttpPort();
|
int httpPort = request.getHttpPort();
|
||||||
Resource capability = request.getResource();
|
Resource capability = request.getResource();
|
||||||
|
String nodeManagerVersion = request.getNMVersion();
|
||||||
|
|
||||||
RegisterNodeManagerResponse response = recordFactory
|
RegisterNodeManagerResponse response = recordFactory
|
||||||
.newRecordInstance(RegisterNodeManagerResponse.class);
|
.newRecordInstance(RegisterNodeManagerResponse.class);
|
||||||
|
|
||||||
|
if (!minimumNodeManagerVersion.equals("NONE")) {
|
||||||
|
if (minimumNodeManagerVersion.equals("EqualToRM")) {
|
||||||
|
minimumNodeManagerVersion = YarnVersionInfo.getVersion();
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((nodeManagerVersion == null) ||
|
||||||
|
(VersionUtil.compareVersions(nodeManagerVersion,minimumNodeManagerVersion)) < 0) {
|
||||||
|
String message =
|
||||||
|
"Disallowed NodeManager Version " + nodeManagerVersion
|
||||||
|
+ ", is less than the minimum version "
|
||||||
|
+ minimumNodeManagerVersion + " sending SHUTDOWN signal to "
|
||||||
|
+ "NodeManager.";
|
||||||
|
LOG.info(message);
|
||||||
|
response.setDiagnosticsMessage(message);
|
||||||
|
response.setNodeAction(NodeAction.SHUTDOWN);
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Check if this node is a 'valid' node
|
// Check if this node is a 'valid' node
|
||||||
if (!this.nodesListManager.isValidNode(host)) {
|
if (!this.nodesListManager.isValidNode(host)) {
|
||||||
String message =
|
String message =
|
||||||
@ -230,6 +258,7 @@ public RegisterNodeManagerResponse registerNodeManager(
|
|||||||
LOG.info(message);
|
LOG.info(message);
|
||||||
response.setNodeAction(NodeAction.NORMAL);
|
response.setNodeAction(NodeAction.NORMAL);
|
||||||
response.setRMIdentifier(ResourceManager.getClusterTimeStamp());
|
response.setRMIdentifier(ResourceManager.getClusterTimeStamp());
|
||||||
|
response.setRMVersion(YarnVersionInfo.getVersion());
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,6 +58,7 @@
|
|||||||
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
|
import org.apache.hadoop.yarn.server.api.records.NodeStatus;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
|
||||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||||
|
import org.apache.hadoop.yarn.util.YarnVersionInfo;
|
||||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
|
||||||
@Private
|
@Private
|
||||||
@ -96,9 +97,9 @@ public NodeManager(String hostName, int containerManagerPort, int httpPort,
|
|||||||
RegisterNodeManagerRequest request = recordFactory
|
RegisterNodeManagerRequest request = recordFactory
|
||||||
.newRecordInstance(RegisterNodeManagerRequest.class);
|
.newRecordInstance(RegisterNodeManagerRequest.class);
|
||||||
request.setHttpPort(httpPort);
|
request.setHttpPort(httpPort);
|
||||||
request.setNodeId(this.nodeId);
|
|
||||||
request.setResource(capability);
|
request.setResource(capability);
|
||||||
request.setNodeId(this.nodeId);
|
request.setNodeId(this.nodeId);
|
||||||
|
request.setNMVersion(YarnVersionInfo.getVersion());
|
||||||
resourceTrackerService.registerNodeManager(request);
|
resourceTrackerService.registerNodeManager(request);
|
||||||
this.schedulerNode = new FiCaSchedulerNode(rmContext.getRMNodes().get(
|
this.schedulerNode = new FiCaSchedulerNode(rmContext.getRMNodes().get(
|
||||||
this.nodeId), false);
|
this.nodeId), false);
|
||||||
|
@ -46,6 +46,7 @@
|
|||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
||||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||||
import org.apache.hadoop.yarn.util.Records;
|
import org.apache.hadoop.yarn.util.Records;
|
||||||
|
import org.apache.hadoop.yarn.util.YarnVersionInfo;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
@ -248,6 +249,59 @@ public void testAddNewExcludePathToConfiguration() throws Exception {
|
|||||||
checkDecommissionedNMCount(rm, ++initialMetricCount);
|
checkDecommissionedNMCount(rm, ++initialMetricCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNodeRegistrationSuccess() throws Exception {
|
||||||
|
writeToHostsFile("host2");
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile
|
||||||
|
.getAbsolutePath());
|
||||||
|
rm = new MockRM(conf);
|
||||||
|
rm.start();
|
||||||
|
|
||||||
|
ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService();
|
||||||
|
RegisterNodeManagerRequest req = Records.newRecord(
|
||||||
|
RegisterNodeManagerRequest.class);
|
||||||
|
NodeId nodeId = NodeId.newInstance("host2", 1234);
|
||||||
|
Resource capability = BuilderUtils.newResource(1024, 1);
|
||||||
|
req.setResource(capability);
|
||||||
|
req.setNodeId(nodeId);
|
||||||
|
req.setHttpPort(1234);
|
||||||
|
req.setNMVersion(YarnVersionInfo.getVersion());
|
||||||
|
// trying to register a invalid node.
|
||||||
|
RegisterNodeManagerResponse response = resourceTrackerService.registerNodeManager(req);
|
||||||
|
Assert.assertEquals(NodeAction.NORMAL,response.getNodeAction());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNodeRegistrationVersionLessThanRM() throws Exception {
|
||||||
|
writeToHostsFile("host2");
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile
|
||||||
|
.getAbsolutePath());
|
||||||
|
conf.set(YarnConfiguration.RM_NODEMANAGER_MINIMUM_VERSION,"EqualToRM" );
|
||||||
|
rm = new MockRM(conf);
|
||||||
|
rm.start();
|
||||||
|
String nmVersion = "1.9.9";
|
||||||
|
|
||||||
|
ResourceTrackerService resourceTrackerService = rm.getResourceTrackerService();
|
||||||
|
RegisterNodeManagerRequest req = Records.newRecord(
|
||||||
|
RegisterNodeManagerRequest.class);
|
||||||
|
NodeId nodeId = NodeId.newInstance("host2", 1234);
|
||||||
|
Resource capability = BuilderUtils.newResource(1024, 1);
|
||||||
|
req.setResource(capability);
|
||||||
|
req.setNodeId(nodeId);
|
||||||
|
req.setHttpPort(1234);
|
||||||
|
req.setNMVersion(nmVersion);
|
||||||
|
// trying to register a invalid node.
|
||||||
|
RegisterNodeManagerResponse response = resourceTrackerService.registerNodeManager(req);
|
||||||
|
Assert.assertEquals(NodeAction.SHUTDOWN,response.getNodeAction());
|
||||||
|
Assert.assertTrue("Diagnostic message did not contain: 'Disallowed NodeManager " +
|
||||||
|
"Version "+ nmVersion + ", is less than the minimum version'",
|
||||||
|
response.getDiagnosticsMessage().contains("Disallowed NodeManager Version " +
|
||||||
|
nmVersion + ", is less than the minimum version "));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNodeRegistrationFailure() throws Exception {
|
public void testNodeRegistrationFailure() throws Exception {
|
||||||
writeToHostsFile("host1");
|
writeToHostsFile("host1");
|
||||||
|
Loading…
Reference in New Issue
Block a user