HDFS-17285. RBF: Add a safe mode check period configuration (#6347) Contributed by LiuGuH.

Reviewed-by: Inigo Goiri <inigoiri@apache.org>
Reviewed-by: Ayush Saxena <ayushsaxena@apache.org>
Signed-off-by: Shilun Fan <slfan1989@apache.org>
This commit is contained in:
LiuGuH 2023-12-21 12:21:59 +08:00 committed by GitHub
parent 5dd1977800
commit b4fed58c6a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 22 additions and 3 deletions

View File

@ -279,6 +279,10 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic {
FEDERATION_ROUTER_PREFIX + "safemode.expiration"; FEDERATION_ROUTER_PREFIX + "safemode.expiration";
public static final long DFS_ROUTER_SAFEMODE_EXPIRATION_DEFAULT = public static final long DFS_ROUTER_SAFEMODE_EXPIRATION_DEFAULT =
3 * DFS_ROUTER_CACHE_TIME_TO_LIVE_MS_DEFAULT; 3 * DFS_ROUTER_CACHE_TIME_TO_LIVE_MS_DEFAULT;
public static final String DFS_ROUTER_SAFEMODE_CHECKPERIOD_MS =
FEDERATION_ROUTER_PREFIX + "safemode.checkperiod";
public static final long DFS_ROUTER_SAFEMODE_CHECKPERIOD_MS_DEFAULT =
TimeUnit.SECONDS.toMillis(5);
// HDFS Router-based federation mount table entries // HDFS Router-based federation mount table entries
/** Maximum number of cache entries to have. */ /** Maximum number of cache entries to have. */

View File

@ -133,8 +133,8 @@ public class RouterSafemodeService extends PeriodicService {
// Use same interval as cache update service // Use same interval as cache update service
this.setIntervalMs(conf.getTimeDuration( this.setIntervalMs(conf.getTimeDuration(
RBFConfigKeys.DFS_ROUTER_CACHE_TIME_TO_LIVE_MS, RBFConfigKeys.DFS_ROUTER_SAFEMODE_CHECKPERIOD_MS,
RBFConfigKeys.DFS_ROUTER_CACHE_TIME_TO_LIVE_MS_DEFAULT, RBFConfigKeys.DFS_ROUTER_SAFEMODE_CHECKPERIOD_MS_DEFAULT,
TimeUnit.MILLISECONDS)); TimeUnit.MILLISECONDS));
this.startupInterval = conf.getTimeDuration( this.startupInterval = conf.getTimeDuration(

View File

@ -530,6 +530,17 @@
</description> </description>
</property> </property>
<property>
<name>dfs.federation.router.safemode.checkperiod</name>
<value>5s</value>
<description>
How often the Router should check safe mode. This
setting supports multiple time unit suffixes as described in
dfs.heartbeat.interval. If no suffix is specified then milliseconds is
assumed.
</description>
</property>
<property> <property>
<name>dfs.federation.router.monitor.namenode</name> <name>dfs.federation.router.monitor.namenode</name>
<value></value> <value></value>

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.hdfs.server.federation.router; package org.apache.hadoop.hdfs.server.federation.router;
import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_CACHE_TIME_TO_LIVE_MS; import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_CACHE_TIME_TO_LIVE_MS;
import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_SAFEMODE_CHECKPERIOD_MS;
import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_SAFEMODE_EXPIRATION; import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_SAFEMODE_EXPIRATION;
import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_SAFEMODE_EXTENSION; import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_SAFEMODE_EXTENSION;
import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.deleteStateStore; import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.deleteStateStore;
@ -70,6 +71,9 @@ public class TestRouterSafemode {
// 200 ms cache refresh // 200 ms cache refresh
conf.setTimeDuration(DFS_ROUTER_CACHE_TIME_TO_LIVE_MS, conf.setTimeDuration(DFS_ROUTER_CACHE_TIME_TO_LIVE_MS,
200, TimeUnit.MILLISECONDS); 200, TimeUnit.MILLISECONDS);
// 100 ms safemode checkperiod
conf.setTimeDuration(DFS_ROUTER_SAFEMODE_CHECKPERIOD_MS,
100, TimeUnit.MILLISECONDS);
// 1 sec post cache update before entering safemode (2 intervals) // 1 sec post cache update before entering safemode (2 intervals)
conf.setTimeDuration(DFS_ROUTER_SAFEMODE_EXPIRATION, conf.setTimeDuration(DFS_ROUTER_SAFEMODE_EXPIRATION,
TimeUnit.SECONDS.toMillis(1), TimeUnit.MILLISECONDS); TimeUnit.SECONDS.toMillis(1), TimeUnit.MILLISECONDS);
@ -133,7 +137,7 @@ public class TestRouterSafemode {
long interval = long interval =
conf.getTimeDuration(DFS_ROUTER_SAFEMODE_EXTENSION, conf.getTimeDuration(DFS_ROUTER_SAFEMODE_EXTENSION,
TimeUnit.SECONDS.toMillis(2), TimeUnit.MILLISECONDS) + TimeUnit.SECONDS.toMillis(2), TimeUnit.MILLISECONDS) +
conf.getTimeDuration(DFS_ROUTER_CACHE_TIME_TO_LIVE_MS, conf.getTimeDuration(DFS_ROUTER_SAFEMODE_CHECKPERIOD_MS,
TimeUnit.SECONDS.toMillis(1), TimeUnit.MILLISECONDS); TimeUnit.SECONDS.toMillis(1), TimeUnit.MILLISECONDS);
Thread.sleep(interval); Thread.sleep(interval);