HDFS-16848. RBF: Improve StateStoreZooKeeperImpl performance (#5147)

This commit is contained in:
sunhao 2023-02-05 09:33:05 +08:00 committed by GitHub
parent dad73b76c0
commit 0ae075a2af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 207 additions and 55 deletions

View File

@ -239,6 +239,18 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic {
public static final long
FEDERATION_STORE_ROUTER_EXPIRATION_DELETION_MS_DEFAULT = -1;
// HDFS Router-based federation State Store ZK DRIVER
public static final String FEDERATION_STORE_ZK_DRIVER_PREFIX =
RBFConfigKeys.FEDERATION_STORE_PREFIX + "driver.zk.";
public static final String FEDERATION_STORE_ZK_PARENT_PATH =
FEDERATION_STORE_ZK_DRIVER_PREFIX + "parent-path";
public static final String FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT =
"/hdfs-federation";
public static final String FEDERATION_STORE_ZK_ASYNC_MAX_THREADS =
FEDERATION_STORE_ZK_DRIVER_PREFIX + "async.max.threads";
public static final int FEDERATION_STORE_ZK_ASYNC_MAX_THREADS_DEFAULT =
-1;
// HDFS Router safe mode
public static final String DFS_ROUTER_SAFEMODE_ENABLE =
FEDERATION_ROUTER_PREFIX + "safemode.enable";

View File

@ -25,7 +25,16 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.imps.CuratorFrameworkState;
import org.apache.hadoop.conf.Configuration;
@ -57,14 +66,9 @@ public class StateStoreZooKeeperImpl extends StateStoreSerializableImpl {
private static final Logger LOG =
LoggerFactory.getLogger(StateStoreZooKeeperImpl.class);
/** Configuration keys. */
public static final String FEDERATION_STORE_ZK_DRIVER_PREFIX =
RBFConfigKeys.FEDERATION_STORE_PREFIX + "driver.zk.";
public static final String FEDERATION_STORE_ZK_PARENT_PATH =
FEDERATION_STORE_ZK_DRIVER_PREFIX + "parent-path";
public static final String FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT =
"/hdfs-federation";
/** Service to get/update zk state. */
private ThreadPoolExecutor executorService;
private boolean enableConcurrent;
/** Directory to store the state store data. */
@ -82,8 +86,22 @@ public boolean initDriver() {
Configuration conf = getConf();
baseZNode = conf.get(
FEDERATION_STORE_ZK_PARENT_PATH,
FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT);
RBFConfigKeys.FEDERATION_STORE_ZK_PARENT_PATH,
RBFConfigKeys.FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT);
int numThreads = conf.getInt(
RBFConfigKeys.FEDERATION_STORE_ZK_ASYNC_MAX_THREADS,
RBFConfigKeys.FEDERATION_STORE_ZK_ASYNC_MAX_THREADS_DEFAULT);
enableConcurrent = numThreads > 0;
if (enableConcurrent) {
ThreadFactory threadFactory = new ThreadFactoryBuilder()
.setNameFormat("StateStore ZK Client-%d")
.build();
this.executorService = new ThreadPoolExecutor(numThreads, numThreads,
0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<>(), threadFactory);
LOG.info("Init StateStoreZookeeperImpl by async mode with {} threads.", numThreads);
} else {
LOG.info("Init StateStoreZookeeperImpl by sync mode.");
}
try {
this.zkManager = new ZKCuratorManager(conf);
this.zkManager.start();
@ -109,8 +127,16 @@ public <T extends BaseRecord> boolean initRecordStorage(
}
}
@VisibleForTesting
public void setEnableConcurrent(boolean enableConcurrent) {
this.enableConcurrent = enableConcurrent;
}
@Override
public void close() throws Exception {
if (executorService != null) {
executorService.shutdown();
}
if (zkManager != null) {
zkManager.close();
}
@ -136,35 +162,22 @@ public <T extends BaseRecord> QueryResult<T> get(Class<T> clazz)
List<T> ret = new ArrayList<>();
String znode = getZNodeForClass(clazz);
try {
List<String> children = zkManager.getChildren(znode);
for (String child : children) {
try {
String path = getNodePath(znode, child);
Stat stat = new Stat();
String data = zkManager.getStringData(path, stat);
boolean corrupted = false;
if (data == null || data.equals("")) {
// All records should have data, otherwise this is corrupted
corrupted = true;
List<Callable<T>> callables = new ArrayList<>();
zkManager.getChildren(znode).forEach(c -> callables.add(() -> getRecord(clazz, znode, c)));
if (enableConcurrent) {
List<Future<T>> futures = executorService.invokeAll(callables);
for (Future<T> future : futures) {
if (future.get() != null) {
ret.add(future.get());
}
}
} else {
try {
T record = createRecord(data, stat, clazz);
for (Callable<T> callable : callables) {
T record = callable.call();
if (record != null) {
ret.add(record);
} catch (IOException e) {
LOG.error("Cannot create record type \"{}\" from \"{}\": {}",
clazz.getSimpleName(), data, e.getMessage());
corrupted = true;
}
}
if (corrupted) {
LOG.error("Cannot get data for {} at {}, cleaning corrupted data",
child, path);
zkManager.delete(path);
}
} catch (Exception e) {
LOG.error("Cannot get data for {}: {}", child, e.getMessage());
}
}
} catch (Exception e) {
getMetrics().addFailure(monotonicNow() - start);
@ -178,6 +191,44 @@ public <T extends BaseRecord> QueryResult<T> get(Class<T> clazz)
return new QueryResult<T>(ret, getTime());
}
/**
* Get one data record in the StateStore or delete it if it's corrupted.
*
* @param clazz Record class to evaluate.
* @param znode The ZNode for the class.
* @param child The child for znode to get.
* @return The record to get.
*/
private <T extends BaseRecord> T getRecord(Class<T> clazz, String znode, String child) {
T record = null;
try {
String path = getNodePath(znode, child);
Stat stat = new Stat();
String data = zkManager.getStringData(path, stat);
boolean corrupted = false;
if (data == null || data.equals("")) {
// All records should have data, otherwise this is corrupted
corrupted = true;
} else {
try {
record = createRecord(data, stat, clazz);
} catch (IOException e) {
LOG.error("Cannot create record type \"{}\" from \"{}\": {}",
clazz.getSimpleName(), data, e.getMessage());
corrupted = true;
}
}
if (corrupted) {
LOG.error("Cannot get data for {} at {}, cleaning corrupted data", child, path);
zkManager.delete(path);
}
} catch (Exception e) {
LOG.error("Cannot get data for {}: {}", child, e.getMessage());
}
return record;
}
@Override
public <T extends BaseRecord> boolean putAll(
List<T> records, boolean update, boolean error) throws IOException {
@ -192,22 +243,40 @@ public <T extends BaseRecord> boolean putAll(
String znode = getZNodeForClass(recordClass);
long start = monotonicNow();
boolean status = true;
for (T record : records) {
final AtomicBoolean status = new AtomicBoolean(true);
List<Callable<Void>> callables = new ArrayList<>();
records.forEach(record ->
callables.add(
() -> {
String primaryKey = getPrimaryKey(record);
String recordZNode = getNodePath(znode, primaryKey);
byte[] data = serialize(record);
if (!writeNode(recordZNode, data, update, error)){
status = false;
if (!writeNode(recordZNode, data, update, error)) {
status.set(false);
}
return null;
}
)
);
try {
if (enableConcurrent) {
executorService.invokeAll(callables);
} else {
for(Callable<Void> callable : callables) {
callable.call();
}
}
} catch (Exception e) {
LOG.error("Write record failed : {}", e.getMessage(), e);
throw new IOException(e);
}
long end = monotonicNow();
if (status) {
if (status.get()) {
getMetrics().addWrite(end - start);
} else {
getMetrics().addFailure(end - start);
}
return status;
return status.get();
}
@Override

View File

@ -377,6 +377,26 @@
</description>
</property>
<property>
<name>dfs.federation.router.store.driver.zk.parent-path</name>
<value>/hdfs-federation</value>
<description>
The parent path of zookeeper for StateStoreZooKeeperImpl.
</description>
</property>
<property>
<name>dfs.federation.router.store.driver.zk.async.max.threads</name>
<value>-1</value>
<description>
Max threads number of StateStoreZooKeeperImpl in async mode.
The only class currently being supported:
org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl.
Default value is -1, which means StateStoreZooKeeperImpl is working in sync mode.
Use positive integer value to enable async mode.
</description>
</property>
<property>
<name>dfs.federation.router.cache.ttl</name>
<value>1m</value>

View File

@ -119,7 +119,7 @@ private <T extends Enum> T generateRandomEnum(Class<T> enumClass) {
}
@SuppressWarnings("unchecked")
private <T extends BaseRecord> T generateFakeRecord(Class<T> recordClass)
protected <T extends BaseRecord> T generateFakeRecord(Class<T> recordClass)
throws IllegalArgumentException, IllegalAccessException, IOException {
if (recordClass == MembershipState.class) {

View File

@ -18,12 +18,13 @@
package org.apache.hadoop.hdfs.server.federation.store.driver;
import static org.apache.hadoop.hdfs.server.federation.store.FederationStateStoreTestUtils.getStateStoreConfiguration;
import static org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl.FEDERATION_STORE_ZK_PARENT_PATH;
import static org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl.FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.curator.framework.CuratorFramework;
@ -40,6 +41,7 @@
import org.apache.hadoop.hdfs.server.federation.store.records.MembershipState;
import org.apache.hadoop.hdfs.server.federation.store.records.MountTable;
import org.apache.hadoop.hdfs.server.federation.store.records.RouterState;
import org.apache.hadoop.util.Time;
import org.apache.zookeeper.CreateMode;
import org.junit.AfterClass;
import org.junit.Before;
@ -73,9 +75,10 @@ public static void setupCluster() throws Exception {
// Disable auto-repair of connection
conf.setLong(RBFConfigKeys.FEDERATION_STORE_CONNECTION_TEST_MS,
TimeUnit.HOURS.toMillis(1));
conf.setInt(RBFConfigKeys.FEDERATION_STORE_ZK_ASYNC_MAX_THREADS, 10);
baseZNode = conf.get(FEDERATION_STORE_ZK_PARENT_PATH,
FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT);
baseZNode = conf.get(RBFConfigKeys.FEDERATION_STORE_ZK_PARENT_PATH,
RBFConfigKeys.FEDERATION_STORE_ZK_PARENT_PATH_DEFAULT);
getStateStore(conf);
}
@ -91,6 +94,8 @@ public static void tearDownCluster() {
@Before
public void startup() throws IOException {
removeAll(getStateStoreDriver());
StateStoreZooKeeperImpl stateStoreZooKeeper = (StateStoreZooKeeperImpl) getStateStoreDriver();
stateStoreZooKeeper.setEnableConcurrent(false);
}
private <T extends BaseRecord> String generateFakeZNode(
@ -126,33 +131,79 @@ private <T extends BaseRecord> void testGetNullRecord(
assertNull(curatorFramework.checkExists().forPath(znode));
}
@Test
public void testAsyncPerformance() throws Exception {
StateStoreZooKeeperImpl stateStoreDriver = (StateStoreZooKeeperImpl) getStateStoreDriver();
List<MountTable> insertList = new ArrayList<>();
for (int i = 0; i < 1000; i++) {
MountTable newRecord = generateFakeRecord(MountTable.class);
insertList.add(newRecord);
}
// Insert Multiple on sync mode
long startSync = Time.now();
stateStoreDriver.putAll(insertList, true, false);
long endSync = Time.now();
stateStoreDriver.removeAll(MembershipState.class);
stateStoreDriver.setEnableConcurrent(true);
// Insert Multiple on async mode
long startAsync = Time.now();
stateStoreDriver.putAll(insertList, true, false);
long endAsync = Time.now();
assertTrue((endSync - startSync) > (endAsync - startAsync));
}
@Test
public void testGetNullRecord() throws Exception {
testGetNullRecord(getStateStoreDriver());
StateStoreZooKeeperImpl stateStoreDriver = (StateStoreZooKeeperImpl) getStateStoreDriver();
testGetNullRecord(stateStoreDriver);
// test async mode
stateStoreDriver.setEnableConcurrent(true);
testGetNullRecord(stateStoreDriver);
}
@Test
public void testInsert()
throws IllegalArgumentException, IllegalAccessException, IOException {
testInsert(getStateStoreDriver());
StateStoreZooKeeperImpl stateStoreDriver = (StateStoreZooKeeperImpl) getStateStoreDriver();
testInsert(stateStoreDriver);
// test async mode
stateStoreDriver.setEnableConcurrent(true);
testInsert(stateStoreDriver);
}
@Test
public void testUpdate()
throws IllegalArgumentException, ReflectiveOperationException,
IOException, SecurityException {
testPut(getStateStoreDriver());
StateStoreZooKeeperImpl stateStoreDriver = (StateStoreZooKeeperImpl) getStateStoreDriver();
testPut(stateStoreDriver);
// test async mode
stateStoreDriver.setEnableConcurrent(true);
testPut(stateStoreDriver);
}
@Test
public void testDelete()
throws IllegalArgumentException, IllegalAccessException, IOException {
testRemove(getStateStoreDriver());
StateStoreZooKeeperImpl stateStoreDriver = (StateStoreZooKeeperImpl) getStateStoreDriver();
testRemove(stateStoreDriver);
// test async mode
stateStoreDriver.setEnableConcurrent(true);
testRemove(stateStoreDriver);
}
@Test
public void testFetchErrors()
throws IllegalArgumentException, IllegalAccessException, IOException {
testFetchErrors(getStateStoreDriver());
StateStoreZooKeeperImpl stateStoreDriver = (StateStoreZooKeeperImpl) getStateStoreDriver();
testFetchErrors(stateStoreDriver);
// test async mode
stateStoreDriver.setEnableConcurrent(true);
testFetchErrors(stateStoreDriver);
}
}