From 6d92aa7c30439d78deb68cc3186a67557544681f Mon Sep 17 00:00:00 2001 From: Mate Szalay-Beko Date: Fri, 18 Oct 2019 22:26:20 +0200 Subject: [PATCH] =?UTF-8?q?HADOOP-16579.=20Upgrade=20to=20Curator=204.2.0?= =?UTF-8?q?=20and=20ZooKeeper=203.5.5=20(#1656).=20Contributed=20by=20Norb?= =?UTF-8?q?ert=20Kalm=C3=A1r,=20Mate=20Szalay-Beko?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * HADOOP-16579 - Upgrade to Apache Curator 4.2.0 and ZooKeeper 3.5.5 - Add a static initializer for the unit tests using ZooKeeper to enable the four-letter-words diagnostic telnet commands. (this is an interface that become disabled by default, so to keep the ZooKeeper 3.4.x behavior we enabled it for the tests) - Also fix ZKFailoverController to look for relevant fail-over ActiveAttempt records. The new ZooKeeper seems to respond quicker during the fail-over tests than the ZooKeeper, so we made sure to catch all the relevant records by adding a new parameter to ZKFailoverontroller.waitForActiveAttempt(). Co-authored-by: Norbert Kalmár --- .../util/ZKSignerSecretProvider.java | 5 +- .../hadoop/ha/ZKFailoverController.java | 15 ++-- .../ZKDelegationTokenSecretManager.java | 6 +- .../hadoop/util/curator/ZKCuratorManager.java | 38 ++++++----- .../apache/hadoop/ha/ClientBaseWithFixes.java | 9 +++ .../client/impl/zk/RegistrySecurity.java | 21 +++--- .../impl/zk/ZookeeperConfigOptions.java | 6 +- .../services/MicroZookeeperService.java | 25 +++++-- hadoop-project/pom.xml | 68 ++++++++++++++++++- .../apache/hadoop/yarn/lib/TestZKClient.java | 17 ++++- .../TestLeaderElectorService.java | 3 +- 11 files changed, 159 insertions(+), 54 deletions(-) diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java index a7fc76f6a1..f0c350ed95 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java @@ -15,7 +15,6 @@ import com.google.common.annotations.VisibleForTesting; import java.nio.ByteBuffer; -import java.nio.charset.Charset; import java.security.SecureRandom; import java.util.Collections; import java.util.HashMap; @@ -36,7 +35,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.ZooDefs.Perms; -import org.apache.zookeeper.client.ZooKeeperSaslClient; +import org.apache.zookeeper.client.ZKClientConfig; import org.apache.zookeeper.data.ACL; import org.apache.zookeeper.data.Id; import org.apache.zookeeper.data.Stat; @@ -368,7 +367,7 @@ protected CuratorFramework createCuratorClient(Properties config) LOG.info("Connecting to ZooKeeper with SASL/Kerberos" + "and using 'sasl' ACLs"); String principal = setJaasConfiguration(config); - System.setProperty(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY, + System.setProperty(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY, JAAS_LOGIN_ENTRY_NAME); System.setProperty("zookeeper.authProvider.1", "org.apache.zookeeper.server.auth.SASLAuthenticationProvider"); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java index d568376d30..ee4ca1a608 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java @@ -442,14 +442,16 @@ private void recordActiveAttempt( * * * @param timeoutMillis number of millis to wait + * @param onlyAfterNanoTime accept attempt records only after a given + * timestamp. Use this parameter to ignore the old attempt records from a + * previous fail-over attempt. * @return the published record, or null if the timeout elapses or the * service becomes unhealthy * @throws InterruptedException if the thread is interrupted. */ - private ActiveAttemptRecord waitForActiveAttempt(int timeoutMillis) - throws InterruptedException { - long st = System.nanoTime(); - long waitUntil = st + TimeUnit.NANOSECONDS.convert( + private ActiveAttemptRecord waitForActiveAttempt(int timeoutMillis, + long onlyAfterNanoTime) throws InterruptedException { + long waitUntil = onlyAfterNanoTime + TimeUnit.NANOSECONDS.convert( timeoutMillis, TimeUnit.MILLISECONDS); do { @@ -466,7 +468,7 @@ private ActiveAttemptRecord waitForActiveAttempt(int timeoutMillis) synchronized (activeAttemptRecordLock) { if ((lastActiveAttemptRecord != null && - lastActiveAttemptRecord.nanoTime >= st)) { + lastActiveAttemptRecord.nanoTime >= onlyAfterNanoTime)) { return lastActiveAttemptRecord; } // Only wait 1sec so that we periodically recheck the health state @@ -660,6 +662,7 @@ private void doGracefulFailover() List otherZkfcs = new ArrayList(otherNodes.size()); // Phase 3: ask the other nodes to yield from the election. + long st = System.nanoTime(); HAServiceTarget activeNode = null; for (HAServiceTarget remote : otherNodes) { // same location, same node - may not always be == equality @@ -678,7 +681,7 @@ private void doGracefulFailover() // Phase 4: wait for the normal election to make the local node // active. - ActiveAttemptRecord attempt = waitForActiveAttempt(timeout + 60000); + ActiveAttemptRecord attempt = waitForActiveAttempt(timeout + 60000, st); if (attempt == null) { // We didn't even make an attempt to become active. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java index cce124ee06..f61590c28e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java @@ -59,7 +59,7 @@ import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.NoNodeException; import org.apache.zookeeper.ZooDefs.Perms; -import org.apache.zookeeper.client.ZooKeeperSaslClient; +import org.apache.zookeeper.client.ZKClientConfig; import org.apache.zookeeper.data.ACL; import org.apache.zookeeper.data.Id; import org.slf4j.Logger; @@ -173,8 +173,8 @@ public ZKDelegationTokenSecretManager(Configuration conf) { LOG.info("Connecting to ZooKeeper with SASL/Kerberos" + "and using 'sasl' ACLs"); String principal = setJaasConfiguration(conf); - System.setProperty(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY, - JAAS_LOGIN_ENTRY_NAME); + System.setProperty(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY, + JAAS_LOGIN_ENTRY_NAME); System.setProperty("zookeeper.authProvider.1", "org.apache.zookeeper.server.auth.SASLAuthenticationProvider"); aclProvider = new SASLOwnerACLProvider(principal); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java index d164138a39..36dade27dd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java @@ -20,13 +20,13 @@ import java.io.IOException; import java.nio.charset.Charset; import java.util.ArrayList; +import java.util.LinkedList; import java.util.List; import org.apache.curator.framework.AuthInfo; import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.CuratorFrameworkFactory; -import org.apache.curator.framework.api.transaction.CuratorTransaction; -import org.apache.curator.framework.api.transaction.CuratorTransactionFinal; +import org.apache.curator.framework.api.transaction.CuratorOp; import org.apache.curator.retry.RetryNTimes; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -387,43 +387,45 @@ public SafeTransaction createTransaction(List fencingACL, /** * Use curator transactions to ensure zk-operations are performed in an all * or nothing fashion. This is equivalent to using ZooKeeper#multi. - * - * TODO (YARN-3774): Curator 3.0 introduces CuratorOp similar to Op. We ll - * have to rewrite this inner class when we adopt that. */ public class SafeTransaction { - private CuratorTransactionFinal transactionFinal; private String fencingNodePath; + private List curatorOperations = new LinkedList<>(); SafeTransaction(List fencingACL, String fencingNodePath) throws Exception { this.fencingNodePath = fencingNodePath; - CuratorTransaction transaction = curator.inTransaction(); - transactionFinal = transaction.create() - .withMode(CreateMode.PERSISTENT).withACL(fencingACL) - .forPath(fencingNodePath, new byte[0]).and(); + curatorOperations.add(curator.transactionOp().create() + .withMode(CreateMode.PERSISTENT) + .withACL(fencingACL) + .forPath(fencingNodePath, new byte[0])); } public void commit() throws Exception { - transactionFinal = transactionFinal.delete() - .forPath(fencingNodePath).and(); - transactionFinal.commit(); + curatorOperations.add(curator.transactionOp().delete() + .forPath(fencingNodePath)); + curator.transaction().forOperations(curatorOperations); + curatorOperations.clear(); } public void create(String path, byte[] data, List acl, CreateMode mode) throws Exception { - transactionFinal = transactionFinal.create() - .withMode(mode).withACL(acl).forPath(path, data).and(); + curatorOperations.add(curator.transactionOp().create() + .withMode(mode) + .withACL(acl) + .forPath(path, data)); } public void delete(String path) throws Exception { - transactionFinal = transactionFinal.delete().forPath(path).and(); + curatorOperations.add(curator.transactionOp().delete() + .forPath(path)); } public void setData(String path, byte[] data, int version) throws Exception { - transactionFinal = transactionFinal.setData() - .withVersion(version).forPath(path, data).and(); + curatorOperations.add(curator.transactionOp().setData() + .withVersion(version) + .forPath(path, data)); } } } \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/ClientBaseWithFixes.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/ClientBaseWithFixes.java index 7396694ce9..be6181157c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/ClientBaseWithFixes.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/ClientBaseWithFixes.java @@ -65,6 +65,15 @@ public abstract class ClientBaseWithFixes extends ZKTestCase { public static int CONNECTION_TIMEOUT = 30000; static final File BASETEST = GenericTestUtils.getTestDir(); + static { + // The 4-letter-words commands are simple diagnostics telnet commands in + // ZooKeeper. Since ZooKeeper 3.5, these are disabled by default due to + // security concerns: https://issues.apache.org/jira/browse/ZOOKEEPER-2693 + // We are enabling them for the tests here, as some tests in hadoop or in + // other projects might still use them + System.setProperty("zookeeper.4lw.commands.whitelist", "*"); + } + protected final String hostPort = initHostPort(); protected int maxCnxns = 0; protected ServerCnxnFactory serverFactory = null; diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/RegistrySecurity.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/RegistrySecurity.java index 175f6bb7cb..c3cb021fb5 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/RegistrySecurity.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/RegistrySecurity.java @@ -32,6 +32,7 @@ import org.apache.hadoop.util.ZKUtil; import org.apache.zookeeper.Environment; import org.apache.zookeeper.ZooDefs; +import org.apache.zookeeper.client.ZKClientConfig; import org.apache.zookeeper.client.ZooKeeperSaslClient; import org.apache.zookeeper.data.ACL; import org.apache.zookeeper.data.Id; @@ -769,19 +770,19 @@ public void applySecurityEnvironment(CuratorFrameworkFactory.Builder JaasConfiguration jconf = new JaasConfiguration(jaasClientEntry, principal, keytab); javax.security.auth.login.Configuration.setConfiguration(jconf); - setSystemPropertyIfUnset(ZooKeeperSaslClient.ENABLE_CLIENT_SASL_KEY, - "true"); - setSystemPropertyIfUnset(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY, - jaasClientEntry); + setSystemPropertyIfUnset(ZKClientConfig.ENABLE_CLIENT_SASL_KEY, + "true"); + setSystemPropertyIfUnset(ZKClientConfig.LOGIN_CONTEXT_NAME_KEY, + jaasClientEntry); } else { // in this case, jaas config is specified so we will not change it LOG.info("Using existing ZK sasl configuration: " + - "jaasClientEntry = " + System.getProperty( - ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY, "Client") + - ", sasl client = " + System.getProperty( - ZooKeeperSaslClient.ENABLE_CLIENT_SASL_KEY, - ZooKeeperSaslClient.ENABLE_CLIENT_SASL_DEFAULT) + - ", jaas = " + existingJaasConf); + "jaasClientEntry = " + System.getProperty( + ZKClientConfig.LOGIN_CONTEXT_NAME_KEY, "Client") + + ", sasl client = " + System.getProperty( + ZKClientConfig.ENABLE_CLIENT_SASL_KEY, + ZKClientConfig.ENABLE_CLIENT_SASL_DEFAULT) + + ", jaas = " + existingJaasConf); } break; diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/ZookeeperConfigOptions.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/ZookeeperConfigOptions.java index edcf0859fc..115af3237a 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/ZookeeperConfigOptions.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/client/impl/zk/ZookeeperConfigOptions.java @@ -18,7 +18,7 @@ package org.apache.hadoop.registry.client.impl.zk; -import org.apache.zookeeper.client.ZooKeeperSaslClient; +import org.apache.zookeeper.client.ZKClientConfig; import org.apache.zookeeper.server.ZooKeeperSaslServer; /** @@ -62,10 +62,10 @@ public interface ZookeeperConfigOptions { * *

* Default value is derived from - * {@link ZooKeeperSaslClient#LOGIN_CONTEXT_NAME_KEY} + * {@link ZKClientConfig#LOGIN_CONTEXT_NAME_KEY} */ String PROP_ZK_SASL_CLIENT_CONTEXT = - ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY; + ZKClientConfig.LOGIN_CONTEXT_NAME_KEY; /** * The SASL client username: {@value}. diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/services/MicroZookeeperService.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/services/MicroZookeeperService.java index b6cf9fc519..a7e2611b3d 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/services/MicroZookeeperService.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/services/MicroZookeeperService.java @@ -42,6 +42,7 @@ import java.io.PrintWriter; import java.io.StringWriter; import java.net.InetSocketAddress; +import java.net.ServerSocket; import java.net.UnknownHostException; /** @@ -121,7 +122,7 @@ public InetSocketAddress getConnectionAddress() { * @throws UnknownHostException if the server cannot resolve the host */ private InetSocketAddress getAddress(int port) throws UnknownHostException { - return new InetSocketAddress(host, port < 0 ? 0 : port); + return new InetSocketAddress(host, port <= 0 ? getRandomAvailablePort() : port); } /** @@ -227,10 +228,8 @@ protected void serviceStart() throws Exception { setupSecurity(); - ZooKeeperServer zkServer = new ZooKeeperServer(); FileTxnSnapLog ftxn = new FileTxnSnapLog(dataDir, dataDir); - zkServer.setTxnLogFactory(ftxn); - zkServer.setTickTime(tickTime); + ZooKeeperServer zkServer = new ZooKeeperServer(ftxn, tickTime); LOG.info("Starting Local Zookeeper service"); factory = ServerCnxnFactory.createFactory(); @@ -245,7 +244,7 @@ protected void serviceStart() throws Exception { PrintWriter pw = new PrintWriter(sw); zkServer.dumpConf(pw); pw.flush(); - LOG.debug(sw.toString()); + LOG.debug("ZooKeeper config:\n" + sw.toString()); } binding = new BindingInformation(); binding.ensembleProvider = new FixedEnsembleProvider(connectString); @@ -279,4 +278,20 @@ public BindingInformation supplyBindingInformation() { "Service is not started: binding information undefined"); return binding; } + + /** + * Returns with a random open port can be used to set as server port for ZooKeeper. + * @return a random open port or 0 (in case of error) + */ + private int getRandomAvailablePort() { + port = 0; + try { + final ServerSocket s = new ServerSocket(0); + port = s.getLocalPort(); + s.close(); + } catch (IOException e) { + LOG.warn("ERROR during selecting random port for ZooKeeper server to bind." , e); + } + return port; + } } diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 803cf7cc65..14f13cdf4d 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -87,8 +87,8 @@ 3.7.1 ${env.HADOOP_PROTOC_PATH} - 3.4.13 - 2.13.0 + 3.5.6 + 4.2.0 3.0.0 3.1.0-RC1 2.1.7 @@ -1204,6 +1204,46 @@ jline jline + + commons-cli + commons-cli + + + io.netty + netty-all + + + io.netty + netty-handler + + + io.netty + netty-transport-native-epoll + + + commons-collections + commons-collections + + + org.apache.kerby + kerb-core + + + org.apache.kerby + kerb-simplekdc + + + org.apache.kerby + kerby-config + + + org.slf4j + slf4j-api + + + org.slf4j + slf4j-log4j12 + @@ -1340,6 +1380,20 @@ org.apache.curator curator-client ${curator.version} + + + org.apache.zookeeper + zookeeper + + + com.google.guava + guava + + + org.slf4j + slf4j-api + + org.apache.curator @@ -1350,6 +1404,16 @@ org.apache.curator curator-test ${curator.version} + + + org.apache.zookeeper + zookeeper + + + com.google.guava + guava + + org.bouncycastle diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/lib/TestZKClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/lib/TestZKClient.java index d22856363d..c2a86fcaa2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/lib/TestZKClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/lib/TestZKClient.java @@ -26,6 +26,7 @@ import java.net.InetSocketAddress; import java.net.Socket; +import org.apache.hadoop.net.ServerSocketUtil; import org.junit.Assert; import org.apache.hadoop.yarn.lib.ZKClient; @@ -39,11 +40,14 @@ public class TestZKClient { - public static int CONNECTION_TIMEOUT = 30000; + private static int CONNECTION_TIMEOUT = 30000; + private static int DEFAULT_PORT = 20384; static final File BASETEST = new File(System.getProperty("build.test.dir", "target/zookeeper-build")); - protected String hostPort = "127.0.0.1:2000"; + protected String hostPort = "127.0.0.1:" + getOpenPort(); + + protected int maxCnxns = 0; protected NIOServerCnxnFactory factory = null; protected ZooKeeperServer zks; @@ -140,6 +144,7 @@ public static File createTmpDir(File parentDir) throws IOException { @Before public void setUp() throws IOException, InterruptedException { System.setProperty("zookeeper.preAllocSize", "100"); + System.setProperty("zookeeper.4lw.commands.whitelist", "*"); FileTxnLog.setPreallocSize(100 * 1024); if (!BASETEST.exists()) { BASETEST.mkdirs(); @@ -186,4 +191,12 @@ private void test(String testClient) throws Exception { client.unregisterService("/nodemanager"); } + private int getOpenPort() { + try { + return ServerSocketUtil.getPorts(1)[0]; + } catch (IOException e) { + return DEFAULT_PORT; + } + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestLeaderElectorService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestLeaderElectorService.java index c2b1eb45b8..4cd1df607f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestLeaderElectorService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestLeaderElectorService.java @@ -169,8 +169,7 @@ public void testExpireCurrentZKSession() throws Exception{ service.getCuratorClient().getZookeeperClient(); // this will expire current curator client session. curator will re-establish // the session. RM will first relinquish leadership and re-acquire leadership - KillSession - .kill(client.getZooKeeper(), client.getCurrentConnectionString()); + KillSession.kill(client.getZooKeeper()); waitFor(rm1, HAServiceState.ACTIVE); }