HDFS-16876: Changes cleanup of shared RouterStateIdContext to be driven by namenodeResolver data. (#5282)
This commit is contained in:
parent
9c6eeb699e
commit
b56d483258
@ -77,10 +77,6 @@ public class ConnectionManager {
|
|||||||
* Global federated namespace context for router.
|
* Global federated namespace context for router.
|
||||||
*/
|
*/
|
||||||
private final RouterStateIdContext routerStateIdContext;
|
private final RouterStateIdContext routerStateIdContext;
|
||||||
/**
|
|
||||||
* Map from connection pool ID to namespace.
|
|
||||||
*/
|
|
||||||
private final Map<ConnectionPoolId, String> connectionPoolToNamespaceMap;
|
|
||||||
/** Max size of queue for creating new connections. */
|
/** Max size of queue for creating new connections. */
|
||||||
private final int creatorQueueMaxSize;
|
private final int creatorQueueMaxSize;
|
||||||
|
|
||||||
@ -105,7 +101,6 @@ public ConnectionManager(Configuration config) {
|
|||||||
public ConnectionManager(Configuration config, RouterStateIdContext routerStateIdContext) {
|
public ConnectionManager(Configuration config, RouterStateIdContext routerStateIdContext) {
|
||||||
this.conf = config;
|
this.conf = config;
|
||||||
this.routerStateIdContext = routerStateIdContext;
|
this.routerStateIdContext = routerStateIdContext;
|
||||||
this.connectionPoolToNamespaceMap = new HashMap<>();
|
|
||||||
// Configure minimum, maximum and active connection pools
|
// Configure minimum, maximum and active connection pools
|
||||||
this.maxSize = this.conf.getInt(
|
this.maxSize = this.conf.getInt(
|
||||||
RBFConfigKeys.DFS_ROUTER_NAMENODE_CONNECTION_POOL_SIZE,
|
RBFConfigKeys.DFS_ROUTER_NAMENODE_CONNECTION_POOL_SIZE,
|
||||||
@ -172,10 +167,6 @@ public void close() {
|
|||||||
pool.close();
|
pool.close();
|
||||||
}
|
}
|
||||||
this.pools.clear();
|
this.pools.clear();
|
||||||
for (String nsID: connectionPoolToNamespaceMap.values()) {
|
|
||||||
routerStateIdContext.removeNamespaceStateId(nsID);
|
|
||||||
}
|
|
||||||
connectionPoolToNamespaceMap.clear();
|
|
||||||
} finally {
|
} finally {
|
||||||
writeLock.unlock();
|
writeLock.unlock();
|
||||||
}
|
}
|
||||||
@ -224,7 +215,6 @@ public ConnectionContext getConnection(UserGroupInformation ugi,
|
|||||||
this.minActiveRatio, protocol,
|
this.minActiveRatio, protocol,
|
||||||
new PoolAlignmentContext(this.routerStateIdContext, nsId));
|
new PoolAlignmentContext(this.routerStateIdContext, nsId));
|
||||||
this.pools.put(connectionId, pool);
|
this.pools.put(connectionId, pool);
|
||||||
this.connectionPoolToNamespaceMap.put(connectionId, nsId);
|
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
writeLock.unlock();
|
writeLock.unlock();
|
||||||
@ -451,11 +441,6 @@ public void run() {
|
|||||||
try {
|
try {
|
||||||
for (ConnectionPoolId poolId : toRemove) {
|
for (ConnectionPoolId poolId : toRemove) {
|
||||||
pools.remove(poolId);
|
pools.remove(poolId);
|
||||||
String nsID = connectionPoolToNamespaceMap.get(poolId);
|
|
||||||
connectionPoolToNamespaceMap.remove(poolId);
|
|
||||||
if (!connectionPoolToNamespaceMap.values().contains(nsID)) {
|
|
||||||
routerStateIdContext.removeNamespaceStateId(nsID);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
writeLock.unlock();
|
writeLock.unlock();
|
||||||
|
@ -53,6 +53,7 @@
|
|||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
import java.util.concurrent.ThreadFactory;
|
import java.util.concurrent.ThreadFactory;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.hdfs.HAUtil;
|
import org.apache.hadoop.hdfs.HAUtil;
|
||||||
@ -413,9 +414,38 @@ public RouterRpcServer(Configuration conf, Router router,
|
|||||||
.forEach(this.dnCache::refresh),
|
.forEach(this.dnCache::refresh),
|
||||||
0,
|
0,
|
||||||
dnCacheExpire, TimeUnit.MILLISECONDS);
|
dnCacheExpire, TimeUnit.MILLISECONDS);
|
||||||
|
|
||||||
|
Executors
|
||||||
|
.newSingleThreadScheduledExecutor()
|
||||||
|
.scheduleWithFixedDelay(this::clearStaleNamespacesInRouterStateIdContext,
|
||||||
|
0,
|
||||||
|
conf.getLong(RBFConfigKeys.FEDERATION_STORE_MEMBERSHIP_EXPIRATION_MS,
|
||||||
|
RBFConfigKeys.FEDERATION_STORE_MEMBERSHIP_EXPIRATION_MS_DEFAULT),
|
||||||
|
TimeUnit.MILLISECONDS);
|
||||||
|
|
||||||
initRouterFedRename();
|
initRouterFedRename();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear expired namespace in the shared RouterStateIdContext.
|
||||||
|
*/
|
||||||
|
private void clearStaleNamespacesInRouterStateIdContext() {
|
||||||
|
try {
|
||||||
|
final Set<String> resolvedNamespaces = namenodeResolver.getNamespaces()
|
||||||
|
.stream()
|
||||||
|
.map(FederationNamespaceInfo::getNameserviceId)
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
|
||||||
|
routerStateIdContext.getNamespaces().forEach(namespace -> {
|
||||||
|
if (!resolvedNamespaces.contains(namespace)) {
|
||||||
|
routerStateIdContext.removeNamespaceStateId(namespace);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.warn("Could not fetch current list of namespaces.", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Init the router federation rename environment. Each router has its own
|
* Init the router federation rename environment. Each router has its own
|
||||||
* journal path.
|
* journal path.
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.atomic.LongAccumulator;
|
import java.util.concurrent.atomic.LongAccumulator;
|
||||||
@ -92,6 +93,10 @@ public LongAccumulator getNamespaceStateId(String nsId) {
|
|||||||
return namespaceIdMap.computeIfAbsent(nsId, key -> new LongAccumulator(Math::max, Long.MIN_VALUE));
|
return namespaceIdMap.computeIfAbsent(nsId, key -> new LongAccumulator(Math::max, Long.MIN_VALUE));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<String> getNamespaces() {
|
||||||
|
return Collections.list(namespaceIdMap.keys());
|
||||||
|
}
|
||||||
|
|
||||||
public void removeNamespaceStateId(String nsId) {
|
public void removeNamespaceStateId(String nsId) {
|
||||||
namespaceIdMap.remove(nsId);
|
namespaceIdMap.remove(nsId);
|
||||||
}
|
}
|
||||||
|
@ -25,7 +25,6 @@
|
|||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMESERVICE_ID;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMESERVICE_ID;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_STATE_CONTEXT_ENABLED_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_STATE_CONTEXT_ENABLED_KEY;
|
||||||
import static org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys.DFS_ROUTER_MONITOR_NAMENODE;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@ -43,6 +42,7 @@
|
|||||||
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.RouterFederatedStateProto;
|
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.RouterFederatedStateProto;
|
||||||
import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster;
|
import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.RouterContext;
|
import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.RouterContext;
|
||||||
|
import org.apache.hadoop.hdfs.server.federation.MockResolver;
|
||||||
import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder;
|
import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder;
|
||||||
import org.apache.hadoop.hdfs.server.federation.StateStoreDFSCluster;
|
import org.apache.hadoop.hdfs.server.federation.StateStoreDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamenodeContext;
|
import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamenodeContext;
|
||||||
@ -359,7 +359,7 @@ public void testMultipleObserverRouter() throws Exception {
|
|||||||
}
|
}
|
||||||
sb.append(suffix);
|
sb.append(suffix);
|
||||||
}
|
}
|
||||||
routerConf.set(DFS_ROUTER_MONITOR_NAMENODE, sb.toString());
|
routerConf.set(RBFConfigKeys.DFS_ROUTER_MONITOR_NAMENODE, sb.toString());
|
||||||
routerConf.setBoolean(RBFConfigKeys.DFS_ROUTER_OBSERVER_READ_DEFAULT_KEY, true);
|
routerConf.setBoolean(RBFConfigKeys.DFS_ROUTER_OBSERVER_READ_DEFAULT_KEY, true);
|
||||||
routerConf.setBoolean(DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY, true);
|
routerConf.setBoolean(DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY, true);
|
||||||
routerConf.set(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, "0ms");
|
routerConf.set(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, "0ms");
|
||||||
@ -564,4 +564,79 @@ public void testStateIdProgressionInRouter() throws Exception {
|
|||||||
LongAccumulator namespaceStateId = routerStateIdContext.getNamespaceStateId("ns0");
|
LongAccumulator namespaceStateId = routerStateIdContext.getNamespaceStateId("ns0");
|
||||||
assertEquals("Router's shared should have progressed.", 21, namespaceStateId.get());
|
assertEquals("Router's shared should have progressed.", 21, namespaceStateId.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Tag(SKIP_BEFORE_EACH_CLUSTER_STARTUP)
|
||||||
|
public void testSharedStateInRouterStateIdContext() throws Exception {
|
||||||
|
Path rootPath = new Path("/");
|
||||||
|
long cleanupPeriodMs = 1000;
|
||||||
|
|
||||||
|
Configuration conf = new Configuration(false);
|
||||||
|
conf.setLong(RBFConfigKeys.DFS_ROUTER_NAMENODE_CONNECTION_POOL_CLEAN, cleanupPeriodMs);
|
||||||
|
conf.setLong(RBFConfigKeys.DFS_ROUTER_NAMENODE_CONNECTION_CLEAN_MS, cleanupPeriodMs / 10);
|
||||||
|
startUpCluster(1, conf);
|
||||||
|
fileSystem = routerContext.getFileSystem(getConfToEnableObserverReads());
|
||||||
|
RouterStateIdContext routerStateIdContext = routerContext.getRouterRpcServer()
|
||||||
|
.getRouterStateIdContext();
|
||||||
|
|
||||||
|
// First read goes to active and creates connection pool for this user to active
|
||||||
|
fileSystem.listStatus(rootPath);
|
||||||
|
// Second read goes to observer and creates connection pool for this user to observer
|
||||||
|
fileSystem.listStatus(rootPath);
|
||||||
|
// Get object storing state of the namespace in the shared RouterStateIdContext
|
||||||
|
LongAccumulator namespaceStateId1 = routerStateIdContext.getNamespaceStateId("ns0");
|
||||||
|
|
||||||
|
// Wait for connection pools to expire and be cleaned up.
|
||||||
|
Thread.sleep(cleanupPeriodMs * 2);
|
||||||
|
|
||||||
|
// Third read goes to observer.
|
||||||
|
// New connection pool to observer is created since existing one expired.
|
||||||
|
fileSystem.listStatus(rootPath);
|
||||||
|
fileSystem.close();
|
||||||
|
// Get object storing state of the namespace in the shared RouterStateIdContext
|
||||||
|
LongAccumulator namespaceStateId2 = routerStateIdContext.getNamespaceStateId("ns0");
|
||||||
|
|
||||||
|
long rpcCountForActive = routerContext.getRouter().getRpcServer()
|
||||||
|
.getRPCMetrics().getActiveProxyOps();
|
||||||
|
long rpcCountForObserver = routerContext.getRouter().getRpcServer()
|
||||||
|
.getRPCMetrics().getObserverProxyOps();
|
||||||
|
|
||||||
|
// First list status goes to active
|
||||||
|
assertEquals("One call should be sent to active", 1, rpcCountForActive);
|
||||||
|
// Last two listStatuses go to observer.
|
||||||
|
assertEquals("Two calls should be sent to observer", 2, rpcCountForObserver);
|
||||||
|
|
||||||
|
Assertions.assertSame(namespaceStateId1, namespaceStateId2,
|
||||||
|
"The same object should be used in the shared RouterStateIdContext");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Tag(SKIP_BEFORE_EACH_CLUSTER_STARTUP)
|
||||||
|
public void testRouterStateIdContextCleanup() throws Exception {
|
||||||
|
Path rootPath = new Path("/");
|
||||||
|
long recordExpiry = TimeUnit.SECONDS.toMillis(1);
|
||||||
|
|
||||||
|
Configuration confOverride = new Configuration(false);
|
||||||
|
confOverride.setLong(RBFConfigKeys.FEDERATION_STORE_MEMBERSHIP_EXPIRATION_MS, recordExpiry);
|
||||||
|
|
||||||
|
startUpCluster(1, confOverride);
|
||||||
|
fileSystem = routerContext.getFileSystem(getConfToEnableObserverReads());
|
||||||
|
RouterStateIdContext routerStateIdContext = routerContext.getRouterRpcServer()
|
||||||
|
.getRouterStateIdContext();
|
||||||
|
|
||||||
|
fileSystem.listStatus(rootPath);
|
||||||
|
List<String> namespace1 = routerStateIdContext.getNamespaces();
|
||||||
|
fileSystem.close();
|
||||||
|
|
||||||
|
MockResolver mockResolver = (MockResolver) routerContext.getRouter().getNamenodeResolver();
|
||||||
|
mockResolver.cleanRegistrations();
|
||||||
|
mockResolver.setDisableRegistration(true);
|
||||||
|
Thread.sleep(recordExpiry * 2);
|
||||||
|
|
||||||
|
List<String> namespace2 = routerStateIdContext.getNamespaces();
|
||||||
|
assertEquals(1, namespace1.size());
|
||||||
|
assertEquals("ns0", namespace1.get(0));
|
||||||
|
assertTrue(namespace2.isEmpty());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user