From 5abf051249d485313dfffc6aeff6f81c0da1f623 Mon Sep 17 00:00:00 2001 From: Xiaoyu Yao Date: Wed, 2 Mar 2016 09:23:17 -0800 Subject: [PATCH] HDFS-9887. WebHdfs socket timeouts should be configurable. Contributed by Austin Donnelly and Chris Douglas. --- .../hdfs/client/HdfsClientConfigKeys.java | 5 +++ .../hadoop/hdfs/web/URLConnectionFactory.java | 30 ++++++++++++--- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 5 ++- .../src/main/resources/hdfs-default.xml | 27 ++++++++++++++ .../hadoop-hdfs/src/site/markdown/WebHDFS.md | 2 + .../hadoop/hdfs/web/TestWebHdfsTimeouts.java | 37 ++++++++++++++++++- 6 files changed, 98 insertions(+), 8 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java index ce0f3e4cfa..f036c566d3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java @@ -38,6 +38,11 @@ public interface HdfsClientConfigKeys { String DFS_WEBHDFS_ACL_PERMISSION_PATTERN_DEFAULT = "^(default:)?(user|group|mask|other):[[A-Za-z_][A-Za-z0-9._-]]*:([rwx-]{3})?(,(default:)?(user|group|mask|other):[[A-Za-z_][A-Za-z0-9._-]]*:([rwx-]{3})?)*$"; + String DFS_WEBHDFS_SOCKET_CONNECT_TIMEOUT_KEY = + "dfs.webhdfs.socket.connect-timeout"; + String DFS_WEBHDFS_SOCKET_READ_TIMEOUT_KEY = + "dfs.webhdfs.socket.read-timeout"; + String DFS_WEBHDFS_OAUTH_ENABLED_KEY = "dfs.webhdfs.oauth2.enabled"; boolean DFS_WEBHDFS_OAUTH_ENABLED_DEFAULT = false; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/URLConnectionFactory.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/URLConnectionFactory.java index 8810ac4875..975f72e8f6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/URLConnectionFactory.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/URLConnectionFactory.java @@ -23,6 +23,7 @@ import java.net.URL; import java.net.URLConnection; import java.security.GeneralSecurityException; +import java.util.concurrent.TimeUnit; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.HttpsURLConnection; @@ -31,6 +32,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.web.oauth2.OAuth2ConnectionConfigurator; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authentication.client.AuthenticatedURL; @@ -62,7 +64,9 @@ public class URLConnectionFactory { @Override public HttpURLConnection configure(HttpURLConnection conn) throws IOException { - URLConnectionFactory.setTimeouts(conn, DEFAULT_SOCKET_TIMEOUT); + URLConnectionFactory.setTimeouts(conn, + DEFAULT_SOCKET_TIMEOUT, + DEFAULT_SOCKET_TIMEOUT); return conn; } }; @@ -128,17 +132,29 @@ public static URLConnectionFactory newOAuth2URLConnectionFactory( * Create a new ConnectionConfigurator for SSL connections */ private static ConnectionConfigurator newSslConnConfigurator( - final int timeout, Configuration conf) + final int defaultTimeout, Configuration conf) throws IOException, GeneralSecurityException { final SSLFactory factory; final SSLSocketFactory sf; final HostnameVerifier hv; + final int connectTimeout; + final int readTimeout; factory = new SSLFactory(SSLFactory.Mode.CLIENT, conf); factory.init(); sf = factory.createSSLSocketFactory(); hv = factory.getHostnameVerifier(); + connectTimeout = (int) conf.getTimeDuration( + HdfsClientConfigKeys.DFS_WEBHDFS_SOCKET_CONNECT_TIMEOUT_KEY, + defaultTimeout, + TimeUnit.MILLISECONDS); + + readTimeout = (int) conf.getTimeDuration( + HdfsClientConfigKeys.DFS_WEBHDFS_SOCKET_READ_TIMEOUT_KEY, + defaultTimeout, + TimeUnit.MILLISECONDS); + return new ConnectionConfigurator() { @Override public HttpURLConnection configure(HttpURLConnection conn) @@ -148,7 +164,7 @@ public HttpURLConnection configure(HttpURLConnection conn) c.setSSLSocketFactory(sf); c.setHostnameVerifier(hv); } - URLConnectionFactory.setTimeouts(conn, timeout); + URLConnectionFactory.setTimeouts(conn, connectTimeout, readTimeout); return conn; } }; @@ -208,8 +224,10 @@ public URLConnection openConnection(URL url, boolean isSpnego) * @param socketTimeout * the connection and read timeout of the connection. */ - private static void setTimeouts(URLConnection connection, int socketTimeout) { - connection.setConnectTimeout(socketTimeout); - connection.setReadTimeout(socketTimeout); + private static void setTimeouts(URLConnection connection, + int connectTimeout, + int readTimeout) { + connection.setConnectTimeout(connectTimeout); + connection.setReadTimeout(readTimeout); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 8be05bff22..956294908b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1993,9 +1993,12 @@ Release 2.8.0 - UNRELEASED HDFS-9843. Document distcp options required for copying between encrypted locations. (Xiaoyu Yao via cnauroth) - HDFS-9831.Document webhdfs retry configuration keys introduced by + HDFS-9831. Document webhdfs retry configuration keys introduced by HDFS-5219/HDFS-5122. (Xiaobing Zhou via xyao) + HDFS-9887. WebHdfs socket timeouts should be configurable. + (Austin Donnelly and Chris Douglas via xyao) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index b4fb2e0806..ea25a91003 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -2317,6 +2317,33 @@ + + dfs.webhdfs.socket.connect-timeout + 60s + + Socket timeout for connecting to WebHDFS servers. This prevents a + WebHDFS client from hanging if the server hostname is + misconfigured, or the server does not response before the timeout + expires. Value is followed by a unit specifier: ns, us, ms, s, m, + h, d for nanoseconds, microseconds, milliseconds, seconds, + minutes, hours, days respectively. Values should provide units, + but milliseconds are assumed. + + + + + dfs.webhdfs.socket.read-timeout + 60s + + Socket timeout for reading data from WebHDFS servers. This + prevents a WebHDFS client from hanging if the server stops sending + data. Value is followed by a unit specifier: ns, us, ms, s, m, h, + d for nanoseconds, microseconds, milliseconds, seconds, minutes, + hours, days respectively. Values should provide units, + but milliseconds are assumed. + + + dfs.client.context default diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md index 79e79b2edc..c7194825d4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/WebHDFS.md @@ -199,6 +199,8 @@ Below are the HDFS configuration options for WebHDFS. |:---- |:---- | | `dfs.web.authentication.kerberos.principal` | The HTTP Kerberos principal used by Hadoop-Auth in the HTTP endpoint. The HTTP Kerberos principal MUST start with 'HTTP/' per Kerberos HTTP SPNEGO specification. A value of "\*" will use all HTTP principals found in the keytab. | | `dfs.web.authentication.kerberos.keytab ` | The Kerberos keytab file with the credentials for the HTTP Kerberos principal used by Hadoop-Auth in the HTTP endpoint. | +| `dfs.webhdfs.socket.connect-timeout` | How long to wait for a connection to be established before failing. Specified as a time duration, ie numerical value followed by a units symbol, eg 2m for two minutes. Defaults to 60s. | +| `dfs.webhdfs.socket.read-timeout` | How long to wait for data to arrive before failing. Defaults to 60s. | Authentication -------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsTimeouts.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsTimeouts.java index 3a87d422a6..664e32df1b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsTimeouts.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsTimeouts.java @@ -33,18 +33,25 @@ import java.net.SocketTimeoutException; import java.nio.channels.SocketChannel; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.authentication.client.ConnectionConfigurator; import org.junit.After; import org.junit.Before; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameter; +import org.junit.runners.Parameterized.Parameters; import org.junit.Test; /** @@ -54,6 +61,7 @@ * bogus server on the namenode HTTP port, which is rigged to not accept new * connections or to accept connections but not send responses. */ +@RunWith(Parameterized.class) public class TestWebHdfsTimeouts { private static final Log LOG = LogFactory.getLog(TestWebHdfsTimeouts.class); @@ -77,14 +85,41 @@ public HttpURLConnection configure(HttpURLConnection conn) throws IOException { } }); + public enum TimeoutSource { ConnectionFactory, Configuration }; + + /** + * Run all tests twice: once with the timeouts set by the + * connection factory, and again with the timeouts set by + * configuration options. + */ + @Parameters(name = "timeoutSource={0}") + public static Collection data() { + return Arrays.asList(new Object[][] { + { TimeoutSource.ConnectionFactory }, + { TimeoutSource.Configuration } + }); + } + + @Parameter + public TimeoutSource timeoutSource; + @Before public void setUp() throws Exception { Configuration conf = WebHdfsTestUtil.createConf(); serverSocket = new ServerSocket(0, CONNECTION_BACKLOG); nnHttpAddress = new InetSocketAddress("localhost", serverSocket.getLocalPort()); conf.set(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY, "localhost:" + serverSocket.getLocalPort()); + if (timeoutSource == TimeoutSource.Configuration) { + String v = Integer.toString(SHORT_SOCKET_TIMEOUT) + "ms"; + conf.set(HdfsClientConfigKeys.DFS_WEBHDFS_SOCKET_CONNECT_TIMEOUT_KEY, v); + conf.set(HdfsClientConfigKeys.DFS_WEBHDFS_SOCKET_READ_TIMEOUT_KEY, v); + } + fs = WebHdfsTestUtil.getWebHdfsFileSystem(conf, WebHdfsConstants.WEBHDFS_SCHEME); - fs.connectionFactory = connectionFactory; + if (timeoutSource == TimeoutSource.ConnectionFactory) { + fs.connectionFactory = connectionFactory; + } + clients = new ArrayList(); serverThread = null; }