YARN-9425. Make initialDelay configurable for FederationStateStoreService#scheduledExecutorService (#4731). Contributed by groot and Shen Yinjie.
Signed-off-by: Ayush Saxena <ayushsaxena@apache.org>
This commit is contained in:
parent
7f176d080c
commit
c294a414b9
@ -3920,6 +3920,13 @@ public static boolean isAclEnabled(Configuration conf) {
|
|||||||
public static final String DEFAULT_FEDERATION_REGISTRY_BASE_KEY =
|
public static final String DEFAULT_FEDERATION_REGISTRY_BASE_KEY =
|
||||||
"yarnfederation/";
|
"yarnfederation/";
|
||||||
|
|
||||||
|
public static final String FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY =
|
||||||
|
FEDERATION_PREFIX + "state-store.heartbeat.initial-delay";
|
||||||
|
|
||||||
|
// 30 secs
|
||||||
|
public static final int
|
||||||
|
DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY = 30;
|
||||||
|
|
||||||
public static final String FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS =
|
public static final String FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS =
|
||||||
FEDERATION_PREFIX + "state-store.heartbeat-interval-secs";
|
FEDERATION_PREFIX + "state-store.heartbeat-interval-secs";
|
||||||
|
|
||||||
|
@ -3624,6 +3624,16 @@
|
|||||||
<name>yarn.federation.enabled</name>
|
<name>yarn.federation.enabled</name>
|
||||||
<value>false</value>
|
<value>false</value>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<description>
|
||||||
|
Initial delay for federation state-store heartbeat service. Value is followed by a unit
|
||||||
|
specifier: ns, us, ms, s, m, h, d for nanoseconds, microseconds, milliseconds, seconds,
|
||||||
|
minutes, hours, days respectively. Values should provide units,
|
||||||
|
but seconds are assumed
|
||||||
|
</description>
|
||||||
|
<name>yarn.federation.state-store.heartbeat.initial-delay</name>
|
||||||
|
<value>30s</value>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<description>
|
<description>
|
||||||
Machine list file to be loaded by the FederationSubCluster Resolver
|
Machine list file to be loaded by the FederationSubCluster Resolver
|
||||||
|
@ -96,6 +96,7 @@ public class FederationStateStoreService extends AbstractService
|
|||||||
private FederationStateStore stateStoreClient = null;
|
private FederationStateStore stateStoreClient = null;
|
||||||
private SubClusterId subClusterId;
|
private SubClusterId subClusterId;
|
||||||
private long heartbeatInterval;
|
private long heartbeatInterval;
|
||||||
|
private long heartbeatInitialDelay;
|
||||||
private RMContext rmContext;
|
private RMContext rmContext;
|
||||||
|
|
||||||
public FederationStateStoreService(RMContext rmContext) {
|
public FederationStateStoreService(RMContext rmContext) {
|
||||||
@ -126,10 +127,24 @@ protected void serviceInit(Configuration conf) throws Exception {
|
|||||||
heartbeatInterval = conf.getLong(
|
heartbeatInterval = conf.getLong(
|
||||||
YarnConfiguration.FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS,
|
YarnConfiguration.FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS,
|
||||||
YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS);
|
YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS);
|
||||||
|
|
||||||
if (heartbeatInterval <= 0) {
|
if (heartbeatInterval <= 0) {
|
||||||
heartbeatInterval =
|
heartbeatInterval =
|
||||||
YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS;
|
YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INTERVAL_SECS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
heartbeatInitialDelay = conf.getTimeDuration(
|
||||||
|
YarnConfiguration.FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY,
|
||||||
|
YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY,
|
||||||
|
TimeUnit.SECONDS);
|
||||||
|
|
||||||
|
if (heartbeatInitialDelay <= 0) {
|
||||||
|
LOG.warn("{} configured value is wrong, must be > 0; using default value of {}",
|
||||||
|
YarnConfiguration.FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY,
|
||||||
|
YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY);
|
||||||
|
heartbeatInitialDelay =
|
||||||
|
YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY;
|
||||||
|
}
|
||||||
LOG.info("Initialized federation membership service.");
|
LOG.info("Initialized federation membership service.");
|
||||||
|
|
||||||
super.serviceInit(conf);
|
super.serviceInit(conf);
|
||||||
@ -206,9 +221,9 @@ private void registerAndInitializeHeartbeat() {
|
|||||||
scheduledExecutorService =
|
scheduledExecutorService =
|
||||||
HadoopExecutors.newSingleThreadScheduledExecutor();
|
HadoopExecutors.newSingleThreadScheduledExecutor();
|
||||||
scheduledExecutorService.scheduleWithFixedDelay(stateStoreHeartbeat,
|
scheduledExecutorService.scheduleWithFixedDelay(stateStoreHeartbeat,
|
||||||
heartbeatInterval, heartbeatInterval, TimeUnit.SECONDS);
|
heartbeatInitialDelay, heartbeatInterval, TimeUnit.SECONDS);
|
||||||
LOG.info("Started federation membership heartbeat with interval: {}",
|
LOG.info("Started federation membership heartbeat with interval: {} and initial delay: {}",
|
||||||
heartbeatInterval);
|
heartbeatInterval, heartbeatInitialDelay);
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.ha.HAServiceProtocol;
|
import org.apache.hadoop.ha.HAServiceProtocol;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.server.federation.store.FederationStateStore;
|
import org.apache.hadoop.yarn.server.federation.store.FederationStateStore;
|
||||||
@ -173,4 +174,37 @@ private String checkSubClusterInfo(SubClusterState state)
|
|||||||
return response.getCapability();
|
return response.getCapability();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFederationStateStoreServiceInitialHeartbeatDelay() throws Exception {
|
||||||
|
conf.setBoolean(YarnConfiguration.FEDERATION_ENABLED, true);
|
||||||
|
conf.setInt(YarnConfiguration.FEDERATION_STATESTORE_HEARTBEAT_INITIAL_DELAY, 10);
|
||||||
|
conf.set(YarnConfiguration.RM_CLUSTER_ID, subClusterId.getId());
|
||||||
|
|
||||||
|
GenericTestUtils.LogCapturer logCapture =
|
||||||
|
GenericTestUtils.LogCapturer.captureLogs(FederationStateStoreService.LOG);
|
||||||
|
|
||||||
|
final MockRM rm = new MockRM(conf);
|
||||||
|
|
||||||
|
// Initially there should be no entry for the sub-cluster
|
||||||
|
rm.init(conf);
|
||||||
|
stateStore = rm.getFederationStateStoreService().getStateStoreClient();
|
||||||
|
GetSubClusterInfoResponse response = stateStore.getSubCluster(request);
|
||||||
|
Assert.assertNull(response);
|
||||||
|
|
||||||
|
// Validate if sub-cluster is registered
|
||||||
|
rm.start();
|
||||||
|
String capability = checkSubClusterInfo(SubClusterState.SC_NEW);
|
||||||
|
Assert.assertTrue(capability.isEmpty());
|
||||||
|
|
||||||
|
// Heartbeat to see if sub-cluster transitions to running
|
||||||
|
FederationStateStoreHeartbeat storeHeartbeat =
|
||||||
|
rm.getFederationStateStoreService().getStateStoreHeartbeatThread();
|
||||||
|
storeHeartbeat.run();
|
||||||
|
capability = checkSubClusterInfo(SubClusterState.SC_RUNNING);
|
||||||
|
checkClusterMetricsInfo(capability, 0);
|
||||||
|
|
||||||
|
Assert.assertTrue(logCapture.getOutput().contains(
|
||||||
|
"Started federation membership heartbeat with interval: 300 and initial delay: 10"));
|
||||||
|
rm.stop();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user