HDFS-16848. RBF: Improve StateStoreZooKeeperImpl performance (#5147)

This commit is contained in:
sunhao 2023-02-05 09:33:05 +08:00 committed by GitHub
parent dad73b76c0
commit 0ae075a2af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 207 additions and 55 deletions

View File

@ -239,6 +239,18 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic {
public static final long public static final long
FEDERATION_STORE_ROUTER_EXPIRATION_DELETION_MS_DEFAULT = -1; FEDERATION_STORE_ROUTER_EXPIRATION_DELETION_MS_DEFAULT = -1;
// HDFS Router-based federation State Store ZK DRIVER
public static final String FEDERATION_STORE_ZK_DRIVER_PREFIX =
RBFConfigKeys.FEDERATION_STORE_PREFIX + "driver.zk.";
public static final String FEDERATION_STORE_ZK_PARENT_PATH =
FEDERATION_STORE_ZK_DRIVER_PREFIX + "parent-path";
public static final String FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT =
"/hdfs-federation";
public static final String FEDERATION_STORE_ZK_ASYNC_MAX_THREADS =
FEDERATION_STORE_ZK_DRIVER_PREFIX + "async.max.threads";
public static final int FEDERATION_STORE_ZK_ASYNC_MAX_THREADS_DEFAULT =
-1;
// HDFS Router safe mode // HDFS Router safe mode
public static final String DFS_ROUTER_SAFEMODE_ENABLE = public static final String DFS_ROUTER_SAFEMODE_ENABLE =
FEDERATION_ROUTER_PREFIX + "safemode.enable"; FEDERATION_ROUTER_PREFIX + "safemode.enable";

View File

@ -25,7 +25,16 @@
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.imps.CuratorFrameworkState; import org.apache.curator.framework.imps.CuratorFrameworkState;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@ -57,14 +66,9 @@ public class StateStoreZooKeeperImpl extends StateStoreSerializableImpl {
private static final Logger LOG = private static final Logger LOG =
LoggerFactory.getLogger(StateStoreZooKeeperImpl.class); LoggerFactory.getLogger(StateStoreZooKeeperImpl.class);
/** Service to get/update zk state. */
/** Configuration keys. */ private ThreadPoolExecutor executorService;
public static final String FEDERATION_STORE_ZK_DRIVER_PREFIX = private boolean enableConcurrent;
RBFConfigKeys.FEDERATION_STORE_PREFIX + "driver.zk.";
public static final String FEDERATION_STORE_ZK_PARENT_PATH =
FEDERATION_STORE_ZK_DRIVER_PREFIX + "parent-path";
public static final String FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT =
"/hdfs-federation";
/** Directory to store the state store data. */ /** Directory to store the state store data. */
@ -82,8 +86,22 @@ public boolean initDriver() {
Configuration conf = getConf(); Configuration conf = getConf();
baseZNode = conf.get( baseZNode = conf.get(
FEDERATION_STORE_ZK_PARENT_PATH, RBFConfigKeys.FEDERATION_STORE_ZK_PARENT_PATH,
FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT); RBFConfigKeys.FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT);
int numThreads = conf.getInt(
RBFConfigKeys.FEDERATION_STORE_ZK_ASYNC_MAX_THREADS,
RBFConfigKeys.FEDERATION_STORE_ZK_ASYNC_MAX_THREADS_DEFAULT);
enableConcurrent = numThreads > 0;
if (enableConcurrent) {
ThreadFactory threadFactory = new ThreadFactoryBuilder()
.setNameFormat("StateStore ZK Client-%d")
.build();
this.executorService = new ThreadPoolExecutor(numThreads, numThreads,
0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<>(), threadFactory);
LOG.info("Init StateStoreZookeeperImpl by async mode with {} threads.", numThreads);
} else {
LOG.info("Init StateStoreZookeeperImpl by sync mode.");
}
try { try {
this.zkManager = new ZKCuratorManager(conf); this.zkManager = new ZKCuratorManager(conf);
this.zkManager.start(); this.zkManager.start();
@ -109,8 +127,16 @@ public <T extends BaseRecord> boolean initRecordStorage(
} }
} }
@VisibleForTesting
public void setEnableConcurrent(boolean enableConcurrent) {
this.enableConcurrent = enableConcurrent;
}
@Override @Override
public void close() throws Exception { public void close() throws Exception {
if (executorService != null) {
executorService.shutdown();
}
if (zkManager != null) { if (zkManager != null) {
zkManager.close(); zkManager.close();
} }
@ -136,35 +162,22 @@ public <T extends BaseRecord> QueryResult<T> get(Class<T> clazz)
List<T> ret = new ArrayList<>(); List<T> ret = new ArrayList<>();
String znode = getZNodeForClass(clazz); String znode = getZNodeForClass(clazz);
try { try {
List<String> children = zkManager.getChildren(znode); List<Callable<T>> callables = new ArrayList<>();
for (String child : children) { zkManager.getChildren(znode).forEach(c -> callables.add(() -> getRecord(clazz, znode, c)));
try { if (enableConcurrent) {
String path = getNodePath(znode, child); List<Future<T>> futures = executorService.invokeAll(callables);
Stat stat = new Stat(); for (Future<T> future : futures) {
String data = zkManager.getStringData(path, stat); if (future.get() != null) {
boolean corrupted = false; ret.add(future.get());
if (data == null || data.equals("")) { }
// All records should have data, otherwise this is corrupted }
corrupted = true;
} else { } else {
try { for (Callable<T> callable : callables) {
T record = createRecord(data, stat, clazz); T record = callable.call();
if (record != null) {
ret.add(record); ret.add(record);
} catch (IOException e) {
LOG.error("Cannot create record type \"{}\" from \"{}\": {}",
clazz.getSimpleName(), data, e.getMessage());
corrupted = true;
} }
} }
if (corrupted) {
LOG.error("Cannot get data for {} at {}, cleaning corrupted data",
child, path);
zkManager.delete(path);
}
} catch (Exception e) {
LOG.error("Cannot get data for {}: {}", child, e.getMessage());
}
} }
} catch (Exception e) { } catch (Exception e) {
getMetrics().addFailure(monotonicNow() - start); getMetrics().addFailure(monotonicNow() - start);
@ -178,6 +191,44 @@ public <T extends BaseRecord> QueryResult<T> get(Class<T> clazz)
return new QueryResult<T>(ret, getTime()); return new QueryResult<T>(ret, getTime());
} }
/**
* Get one data record in the StateStore or delete it if it's corrupted.
*
* @param clazz Record class to evaluate.
* @param znode The ZNode for the class.
* @param child The child for znode to get.
* @return The record to get.
*/
private <T extends BaseRecord> T getRecord(Class<T> clazz, String znode, String child) {
T record = null;
try {
String path = getNodePath(znode, child);
Stat stat = new Stat();
String data = zkManager.getStringData(path, stat);
boolean corrupted = false;
if (data == null || data.equals("")) {
// All records should have data, otherwise this is corrupted
corrupted = true;
} else {
try {
record = createRecord(data, stat, clazz);
} catch (IOException e) {
LOG.error("Cannot create record type \"{}\" from \"{}\": {}",
clazz.getSimpleName(), data, e.getMessage());
corrupted = true;
}
}
if (corrupted) {
LOG.error("Cannot get data for {} at {}, cleaning corrupted data", child, path);
zkManager.delete(path);
}
} catch (Exception e) {
LOG.error("Cannot get data for {}: {}", child, e.getMessage());
}
return record;
}
@Override @Override
public <T extends BaseRecord> boolean putAll( public <T extends BaseRecord> boolean putAll(
List<T> records, boolean update, boolean error) throws IOException { List<T> records, boolean update, boolean error) throws IOException {
@ -192,22 +243,40 @@ public <T extends BaseRecord> boolean putAll(
String znode = getZNodeForClass(recordClass); String znode = getZNodeForClass(recordClass);
long start = monotonicNow(); long start = monotonicNow();
boolean status = true; final AtomicBoolean status = new AtomicBoolean(true);
for (T record : records) { List<Callable<Void>> callables = new ArrayList<>();
records.forEach(record ->
callables.add(
() -> {
String primaryKey = getPrimaryKey(record); String primaryKey = getPrimaryKey(record);
String recordZNode = getNodePath(znode, primaryKey); String recordZNode = getNodePath(znode, primaryKey);
byte[] data = serialize(record); byte[] data = serialize(record);
if (!writeNode(recordZNode, data, update, error)) { if (!writeNode(recordZNode, data, update, error)) {
status = false; status.set(false);
} }
return null;
}
)
);
try {
if (enableConcurrent) {
executorService.invokeAll(callables);
} else {
for(Callable<Void> callable : callables) {
callable.call();
}
}
} catch (Exception e) {
LOG.error("Write record failed : {}", e.getMessage(), e);
throw new IOException(e);
} }
long end = monotonicNow(); long end = monotonicNow();
if (status) { if (status.get()) {
getMetrics().addWrite(end - start); getMetrics().addWrite(end - start);
} else { } else {
getMetrics().addFailure(end - start); getMetrics().addFailure(end - start);
} }
return status; return status.get();
} }
@Override @Override

View File

@ -377,6 +377,26 @@
</description> </description>
</property> </property>
<property>
<name>dfs.federation.router.store.driver.zk.parent-path</name>
<value>/hdfs-federation</value>
<description>
The parent path of zookeeper for StateStoreZooKeeperImpl.
</description>
</property>
<property>
<name>dfs.federation.router.store.driver.zk.async.max.threads</name>
<value>-1</value>
<description>
Max threads number of StateStoreZooKeeperImpl in async mode.
The only class currently being supported:
org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl.
Default value is -1, which means StateStoreZooKeeperImpl is working in sync mode.
Use positive integer value to enable async mode.
</description>
</property>
<property> <property>
<name>dfs.federation.router.cache.ttl</name> <name>dfs.federation.router.cache.ttl</name>
<value>1m</value> <value>1m</value>

View File

@ -119,7 +119,7 @@ private <T extends Enum> T generateRandomEnum(Class<T> enumClass) {
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
private <T extends BaseRecord> T generateFakeRecord(Class<T> recordClass) protected <T extends BaseRecord> T generateFakeRecord(Class<T> recordClass)
throws IllegalArgumentException, IllegalAccessException, IOException { throws IllegalArgumentException, IllegalAccessException, IOException {
if (recordClass == MembershipState.class) { if (recordClass == MembershipState.class) {

View File

@ -18,12 +18,13 @@
package org.apache.hadoop.hdfs.server.federation.store.driver; package org.apache.hadoop.hdfs.server.federation.store.driver;
import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.getStateStoreConfiguration; import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.getStateStoreConfiguration;
import static org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl.FEDERATION_STORE_ZK_PARENT_PATH;
import static org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl.FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull; import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.CuratorFramework;
@ -40,6 +41,7 @@
import org.apache.hadoop.hdfs.server.federation.store.records.MembershipState; import org.apache.hadoop.hdfs.server.federation.store.records.MembershipState;
import org.apache.hadoop.hdfs.server.federation.store.records.MountTable; import org.apache.hadoop.hdfs.server.federation.store.records.MountTable;
import org.apache.hadoop.hdfs.server.federation.store.records.RouterState; import org.apache.hadoop.hdfs.server.federation.store.records.RouterState;
import org.apache.hadoop.util.Time;
import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.CreateMode;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.Before; import org.junit.Before;
@ -73,9 +75,10 @@ public static void setupCluster() throws Exception {
// Disable auto-repair of connection // Disable auto-repair of connection
conf.setLong(RBFConfigKeys.FEDERATION_STORE_CONNECTION_TEST_MS, conf.setLong(RBFConfigKeys.FEDERATION_STORE_CONNECTION_TEST_MS,
TimeUnit.HOURS.toMillis(1)); TimeUnit.HOURS.toMillis(1));
conf.setInt(RBFConfigKeys.FEDERATION_STORE_ZK_ASYNC_MAX_THREADS, 10);
baseZNode = conf.get(FEDERATION_STORE_ZK_PARENT_PATH, baseZNode = conf.get(RBFConfigKeys.FEDERATION_STORE_ZK_PARENT_PATH,
FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT); RBFConfigKeys.FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT);
getStateStore(conf); getStateStore(conf);
} }
@ -91,6 +94,8 @@ public static void tearDownCluster() {
@Before @Before
public void startup() throws IOException { public void startup() throws IOException {
removeAll(getStateStoreDriver()); removeAll(getStateStoreDriver());
StateStoreZooKeeperImpl stateStoreZooKeeper = (StateStoreZooKeeperImpl) getStateStoreDriver();
stateStoreZooKeeper.setEnableConcurrent(false);
} }
private <T extends BaseRecord> String generateFakeZNode( private <T extends BaseRecord> String generateFakeZNode(
@ -126,33 +131,79 @@ private <T extends BaseRecord> void testGetNullRecord(
assertNull(curatorFramework.checkExists().forPath(znode)); assertNull(curatorFramework.checkExists().forPath(znode));
} }
@Test
public void testAsyncPerformance() throws Exception {
StateStoreZooKeeperImpl stateStoreDriver = (StateStoreZooKeeperImpl) getStateStoreDriver();
List<MountTable> insertList = new ArrayList<>();
for (int i = 0; i < 1000; i++) {
MountTable newRecord = generateFakeRecord(MountTable.class);
insertList.add(newRecord);
}
// Insert Multiple on sync mode
long startSync = Time.now();
stateStoreDriver.putAll(insertList, true, false);
long endSync = Time.now();
stateStoreDriver.removeAll(MembershipState.class);
stateStoreDriver.setEnableConcurrent(true);
// Insert Multiple on async mode
long startAsync = Time.now();
stateStoreDriver.putAll(insertList, true, false);
long endAsync = Time.now();
assertTrue((endSync - startSync) > (endAsync - startAsync));
}
@Test @Test
public void testGetNullRecord() throws Exception { public void testGetNullRecord() throws Exception {
testGetNullRecord(getStateStoreDriver()); StateStoreZooKeeperImpl stateStoreDriver = (StateStoreZooKeeperImpl) getStateStoreDriver();
testGetNullRecord(stateStoreDriver);
// test async mode
stateStoreDriver.setEnableConcurrent(true);
testGetNullRecord(stateStoreDriver);
} }
@Test @Test
public void testInsert() public void testInsert()
throws IllegalArgumentException, IllegalAccessException, IOException { throws IllegalArgumentException, IllegalAccessException, IOException {
testInsert(getStateStoreDriver()); StateStoreZooKeeperImpl stateStoreDriver = (StateStoreZooKeeperImpl) getStateStoreDriver();
testInsert(stateStoreDriver);
// test async mode
stateStoreDriver.setEnableConcurrent(true);
testInsert(stateStoreDriver);
} }
@Test @Test
public void testUpdate() public void testUpdate()
throws IllegalArgumentException, ReflectiveOperationException, throws IllegalArgumentException, ReflectiveOperationException,
IOException, SecurityException { IOException, SecurityException {
testPut(getStateStoreDriver()); StateStoreZooKeeperImpl stateStoreDriver = (StateStoreZooKeeperImpl) getStateStoreDriver();
testPut(stateStoreDriver);
// test async mode
stateStoreDriver.setEnableConcurrent(true);
testPut(stateStoreDriver);
} }
@Test @Test
public void testDelete() public void testDelete()
throws IllegalArgumentException, IllegalAccessException, IOException { throws IllegalArgumentException, IllegalAccessException, IOException {
testRemove(getStateStoreDriver()); StateStoreZooKeeperImpl stateStoreDriver = (StateStoreZooKeeperImpl) getStateStoreDriver();
testRemove(stateStoreDriver);
// test async mode
stateStoreDriver.setEnableConcurrent(true);
testRemove(stateStoreDriver);
} }
@Test @Test
public void testFetchErrors() public void testFetchErrors()
throws IllegalArgumentException, IllegalAccessException, IOException { throws IllegalArgumentException, IllegalAccessException, IOException {
testFetchErrors(getStateStoreDriver()); StateStoreZooKeeperImpl stateStoreDriver = (StateStoreZooKeeperImpl) getStateStoreDriver();
testFetchErrors(stateStoreDriver);
// test async mode
stateStoreDriver.setEnableConcurrent(true);
testFetchErrors(stateStoreDriver);
} }
} }