HADOOP-16579. Upgrade to Curator 4.2.0 and ZooKeeper 3.5.5 (#1656). Contributed by Norbert Kalmár, Mate Szalay-Beko

* HADOOP-16579 - Upgrade to Apache Curator 4.2.0 and ZooKeeper 3.5.5

- Add a static initializer for the unit tests using ZooKeeper to enable
the four-letter-words diagnostic telnet commands. (this is an interface
that become disabled by default, so to keep the ZooKeeper 3.4.x behavior
we enabled it for the tests)
- Also fix ZKFailoverController to look for relevant fail-over ActiveAttempt
records. The new ZooKeeper seems to respond quicker during the fail-over
tests than the ZooKeeper, so we made sure to catch all the relevant records
by adding a new parameter to ZKFailoverontroller.waitForActiveAttempt().

Co-authored-by: Norbert Kalmár <nkalmar@cloudera.com>
This commit is contained in:
Mate Szalay-Beko 2019-10-18 22:26:20 +02:00 committed by Wei-Chiu Chuang
parent 3d41f33018
commit 6d92aa7c30
11 changed files with 159 additions and 54 deletions

View File

@ -15,7 +15,6 @@
import com.google.common.annotations.VisibleForTesting;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.security.SecureRandom;
import java.util.Collections;
import java.util.HashMap;
@ -36,7 +35,7 @@
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.ZooDefs.Perms;
import org.apache.zookeeper.client.ZooKeeperSaslClient;
import org.apache.zookeeper.client.ZKClientConfig;
import org.apache.zookeeper.data.ACL;
import org.apache.zookeeper.data.Id;
import org.apache.zookeeper.data.Stat;
@ -368,7 +367,7 @@ protected CuratorFramework createCuratorClient(Properties config)
LOG.info("Connecting to ZooKeeper with SASL/Kerberos"
+ "and using 'sasl' ACLs");
String principal = setJaasConfiguration(config);
System.setProperty(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY,
System.setProperty(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY,
JAAS_LOGIN_ENTRY_NAME);
System.setProperty("zookeeper.authProvider.1",
"org.apache.zookeeper.server.auth.SASLAuthenticationProvider");

View File

@ -442,14 +442,16 @@ private void recordActiveAttempt(
* </ul>
*
* @param timeoutMillis number of millis to wait
* @param onlyAfterNanoTime accept attempt records only after a given
* timestamp. Use this parameter to ignore the old attempt records from a
* previous fail-over attempt.
* @return the published record, or null if the timeout elapses or the
* service becomes unhealthy
* @throws InterruptedException if the thread is interrupted.
*/
private ActiveAttemptRecord waitForActiveAttempt(int timeoutMillis)
throws InterruptedException {
long st = System.nanoTime();
long waitUntil = st + TimeUnit.NANOSECONDS.convert(
private ActiveAttemptRecord waitForActiveAttempt(int timeoutMillis,
long onlyAfterNanoTime) throws InterruptedException {
long waitUntil = onlyAfterNanoTime + TimeUnit.NANOSECONDS.convert(
timeoutMillis, TimeUnit.MILLISECONDS);
do {
@ -466,7 +468,7 @@ private ActiveAttemptRecord waitForActiveAttempt(int timeoutMillis)
synchronized (activeAttemptRecordLock) {
if ((lastActiveAttemptRecord != null &&
lastActiveAttemptRecord.nanoTime >= st)) {
lastActiveAttemptRecord.nanoTime >= onlyAfterNanoTime)) {
return lastActiveAttemptRecord;
}
// Only wait 1sec so that we periodically recheck the health state
@ -660,6 +662,7 @@ private void doGracefulFailover()
List<ZKFCProtocol> otherZkfcs = new ArrayList<ZKFCProtocol>(otherNodes.size());
// Phase 3: ask the other nodes to yield from the election.
long st = System.nanoTime();
HAServiceTarget activeNode = null;
for (HAServiceTarget remote : otherNodes) {
// same location, same node - may not always be == equality
@ -678,7 +681,7 @@ private void doGracefulFailover()
// Phase 4: wait for the normal election to make the local node
// active.
ActiveAttemptRecord attempt = waitForActiveAttempt(timeout + 60000);
ActiveAttemptRecord attempt = waitForActiveAttempt(timeout + 60000, st);
if (attempt == null) {
// We didn't even make an attempt to become active.

View File

@ -59,7 +59,7 @@
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.ZooDefs.Perms;
import org.apache.zookeeper.client.ZooKeeperSaslClient;
import org.apache.zookeeper.client.ZKClientConfig;
import org.apache.zookeeper.data.ACL;
import org.apache.zookeeper.data.Id;
import org.slf4j.Logger;
@ -173,8 +173,8 @@ public ZKDelegationTokenSecretManager(Configuration conf) {
LOG.info("Connecting to ZooKeeper with SASL/Kerberos"
+ "and using 'sasl' ACLs");
String principal = setJaasConfiguration(conf);
System.setProperty(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY,
JAAS_LOGIN_ENTRY_NAME);
System.setProperty(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY,
JAAS_LOGIN_ENTRY_NAME);
System.setProperty("zookeeper.authProvider.1",
"org.apache.zookeeper.server.auth.SASLAuthenticationProvider");
aclProvider = new SASLOwnerACLProvider(principal);

View File

@ -20,13 +20,13 @@
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import org.apache.curator.framework.AuthInfo;
import org.apache.curator.framework.CuratorFramework;
import org.apache.curator.framework.CuratorFrameworkFactory;
import org.apache.curator.framework.api.transaction.CuratorTransaction;
import org.apache.curator.framework.api.transaction.CuratorTransactionFinal;
import org.apache.curator.framework.api.transaction.CuratorOp;
import org.apache.curator.retry.RetryNTimes;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
@ -387,43 +387,45 @@ public SafeTransaction createTransaction(List<ACL> fencingACL,
/**
* Use curator transactions to ensure zk-operations are performed in an all
* or nothing fashion. This is equivalent to using ZooKeeper#multi.
*
* TODO (YARN-3774): Curator 3.0 introduces CuratorOp similar to Op. We ll
* have to rewrite this inner class when we adopt that.
*/
public class SafeTransaction {
private CuratorTransactionFinal transactionFinal;
private String fencingNodePath;
private List<CuratorOp> curatorOperations = new LinkedList<>();
SafeTransaction(List<ACL> fencingACL, String fencingNodePath)
throws Exception {
this.fencingNodePath = fencingNodePath;
CuratorTransaction transaction = curator.inTransaction();
transactionFinal = transaction.create()
.withMode(CreateMode.PERSISTENT).withACL(fencingACL)
.forPath(fencingNodePath, new byte[0]).and();
curatorOperations.add(curator.transactionOp().create()
.withMode(CreateMode.PERSISTENT)
.withACL(fencingACL)
.forPath(fencingNodePath, new byte[0]));
}
public void commit() throws Exception {
transactionFinal = transactionFinal.delete()
.forPath(fencingNodePath).and();
transactionFinal.commit();
curatorOperations.add(curator.transactionOp().delete()
.forPath(fencingNodePath));
curator.transaction().forOperations(curatorOperations);
curatorOperations.clear();
}
public void create(String path, byte[] data, List<ACL> acl, CreateMode mode)
throws Exception {
transactionFinal = transactionFinal.create()
.withMode(mode).withACL(acl).forPath(path, data).and();
curatorOperations.add(curator.transactionOp().create()
.withMode(mode)
.withACL(acl)
.forPath(path, data));
}
public void delete(String path) throws Exception {
transactionFinal = transactionFinal.delete().forPath(path).and();
curatorOperations.add(curator.transactionOp().delete()
.forPath(path));
}
public void setData(String path, byte[] data, int version)
throws Exception {
transactionFinal = transactionFinal.setData()
.withVersion(version).forPath(path, data).and();
curatorOperations.add(curator.transactionOp().setData()
.withVersion(version)
.forPath(path, data));
}
}
}

View File

@ -65,6 +65,15 @@ public abstract class ClientBaseWithFixes extends ZKTestCase {
public static int CONNECTION_TIMEOUT = 30000;
static final File BASETEST = GenericTestUtils.getTestDir();
static {
// The 4-letter-words commands are simple diagnostics telnet commands in
// ZooKeeper. Since ZooKeeper 3.5, these are disabled by default due to
// security concerns: https://issues.apache.org/jira/browse/ZOOKEEPER-2693
// We are enabling them for the tests here, as some tests in hadoop or in
// other projects might still use them
System.setProperty("zookeeper.4lw.commands.whitelist", "*");
}
protected final String hostPort = initHostPort();
protected int maxCnxns = 0;
protected ServerCnxnFactory serverFactory = null;

View File

@ -32,6 +32,7 @@
import org.apache.hadoop.util.ZKUtil;
import org.apache.zookeeper.Environment;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.client.ZKClientConfig;
import org.apache.zookeeper.client.ZooKeeperSaslClient;
import org.apache.zookeeper.data.ACL;
import org.apache.zookeeper.data.Id;
@ -769,19 +770,19 @@ public void applySecurityEnvironment(CuratorFrameworkFactory.Builder
JaasConfiguration jconf =
new JaasConfiguration(jaasClientEntry, principal, keytab);
javax.security.auth.login.Configuration.setConfiguration(jconf);
setSystemPropertyIfUnset(ZooKeeperSaslClient.ENABLE_CLIENT_SASL_KEY,
"true");
setSystemPropertyIfUnset(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY,
jaasClientEntry);
setSystemPropertyIfUnset(ZKClientConfig.ENABLE_CLIENT_SASL_KEY,
"true");
setSystemPropertyIfUnset(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY,
jaasClientEntry);
} else {
// in this case, jaas config is specified so we will not change it
LOG.info("Using existing ZK sasl configuration: " +
"jaasClientEntry = " + System.getProperty(
ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY, "Client") +
", sasl client = " + System.getProperty(
ZooKeeperSaslClient.ENABLE_CLIENT_SASL_KEY,
ZooKeeperSaslClient.ENABLE_CLIENT_SASL_DEFAULT) +
", jaas = " + existingJaasConf);
"jaasClientEntry = " + System.getProperty(
ZKClientConfig.LOGIN_CONTEXT_NAME_KEY, "Client") +
", sasl client = " + System.getProperty(
ZKClientConfig.ENABLE_CLIENT_SASL_KEY,
ZKClientConfig.ENABLE_CLIENT_SASL_DEFAULT) +
", jaas = " + existingJaasConf);
}
break;

View File

@ -18,7 +18,7 @@
package org.apache.hadoop.registry.client.impl.zk;
import org.apache.zookeeper.client.ZooKeeperSaslClient;
import org.apache.zookeeper.client.ZKClientConfig;
import org.apache.zookeeper.server.ZooKeeperSaslServer;
/**
@ -62,10 +62,10 @@ public interface ZookeeperConfigOptions {
*
* <p>
* Default value is derived from
* {@link ZooKeeperSaslClient#LOGIN_CONTEXT_NAME_KEY}
* {@link ZKClientConfig#LOGIN_CONTEXT_NAME_KEY}
*/
String PROP_ZK_SASL_CLIENT_CONTEXT =
ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY;
ZKClientConfig.LOGIN_CONTEXT_NAME_KEY;
/**
* The SASL client username: {@value}.

View File

@ -42,6 +42,7 @@
import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.InetSocketAddress;
import java.net.ServerSocket;
import java.net.UnknownHostException;
/**
@ -121,7 +122,7 @@ public InetSocketAddress getConnectionAddress() {
* @throws UnknownHostException if the server cannot resolve the host
*/
private InetSocketAddress getAddress(int port) throws UnknownHostException {
return new InetSocketAddress(host, port < 0 ? 0 : port);
return new InetSocketAddress(host, port <= 0 ? getRandomAvailablePort() : port);
}
/**
@ -227,10 +228,8 @@ protected void serviceStart() throws Exception {
setupSecurity();
ZooKeeperServer zkServer = new ZooKeeperServer();
FileTxnSnapLog ftxn = new FileTxnSnapLog(dataDir, dataDir);
zkServer.setTxnLogFactory(ftxn);
zkServer.setTickTime(tickTime);
ZooKeeperServer zkServer = new ZooKeeperServer(ftxn, tickTime);
LOG.info("Starting Local Zookeeper service");
factory = ServerCnxnFactory.createFactory();
@ -245,7 +244,7 @@ protected void serviceStart() throws Exception {
PrintWriter pw = new PrintWriter(sw);
zkServer.dumpConf(pw);
pw.flush();
LOG.debug(sw.toString());
LOG.debug("ZooKeeper config:\n" + sw.toString());
}
binding = new BindingInformation();
binding.ensembleProvider = new FixedEnsembleProvider(connectString);
@ -279,4 +278,20 @@ public BindingInformation supplyBindingInformation() {
"Service is not started: binding information undefined");
return binding;
}
/**
* Returns with a random open port can be used to set as server port for ZooKeeper.
* @return a random open port or 0 (in case of error)
*/
private int getRandomAvailablePort() {
port = 0;
try {
final ServerSocket s = new ServerSocket(0);
port = s.getLocalPort();
s.close();
} catch (IOException e) {
LOG.warn("ERROR during selecting random port for ZooKeeper server to bind." , e);
}
return port;
}
}

View File

@ -87,8 +87,8 @@
<protobuf.version>3.7.1</protobuf.version>
<protoc.path>${env.HADOOP_PROTOC_PATH}</protoc.path>
<zookeeper.version>3.4.13</zookeeper.version>
<curator.version>2.13.0</curator.version>
<zookeeper.version>3.5.6</zookeeper.version>
<curator.version>4.2.0</curator.version>
<findbugs.version>3.0.0</findbugs.version>
<spotbugs.version>3.1.0-RC1</spotbugs.version>
<dnsjava.version>2.1.7</dnsjava.version>
@ -1204,6 +1204,46 @@
<groupId>jline</groupId>
<artifactId>jline</artifactId>
</exclusion>
<exclusion>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-handler</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty-transport-native-epoll</artifactId>
</exclusion>
<exclusion>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.kerby</groupId>
<artifactId>kerb-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.kerby</groupId>
<artifactId>kerb-simplekdc</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.kerby</groupId>
<artifactId>kerby-config</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
@ -1340,6 +1380,20 @@
<groupId>org.apache.curator</groupId>
<artifactId>curator-client</artifactId>
<version>${curator.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
@ -1350,6 +1404,16 @@
<groupId>org.apache.curator</groupId>
<artifactId>curator-test</artifactId>
<version>${curator.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.bouncycastle</groupId>

View File

@ -26,6 +26,7 @@
import java.net.InetSocketAddress;
import java.net.Socket;
import org.apache.hadoop.net.ServerSocketUtil;
import org.junit.Assert;
import org.apache.hadoop.yarn.lib.ZKClient;
@ -39,11 +40,14 @@
public class TestZKClient {
public static int CONNECTION_TIMEOUT = 30000;
private static int CONNECTION_TIMEOUT = 30000;
private static int DEFAULT_PORT = 20384;
static final File BASETEST =
new File(System.getProperty("build.test.dir", "target/zookeeper-build"));
protected String hostPort = "127.0.0.1:2000";
protected String hostPort = "127.0.0.1:" + getOpenPort();
protected int maxCnxns = 0;
protected NIOServerCnxnFactory factory = null;
protected ZooKeeperServer zks;
@ -140,6 +144,7 @@ public static File createTmpDir(File parentDir) throws IOException {
@Before
public void setUp() throws IOException, InterruptedException {
System.setProperty("zookeeper.preAllocSize", "100");
System.setProperty("zookeeper.4lw.commands.whitelist", "*");
FileTxnLog.setPreallocSize(100 * 1024);
if (!BASETEST.exists()) {
BASETEST.mkdirs();
@ -186,4 +191,12 @@ private void test(String testClient) throws Exception {
client.unregisterService("/nodemanager");
}
private int getOpenPort() {
try {
return ServerSocketUtil.getPorts(1)[0];
} catch (IOException e) {
return DEFAULT_PORT;
}
}
}

View File

@ -169,8 +169,7 @@ public void testExpireCurrentZKSession() throws Exception{
service.getCuratorClient().getZookeeperClient();
// this will expire current curator client session. curator will re-establish
// the session. RM will first relinquish leadership and re-acquire leadership
KillSession
.kill(client.getZooKeeper(), client.getCurrentConnectionString());
KillSession.kill(client.getZooKeeper());
waitFor(rm1, HAServiceState.ACTIVE);
}