name | -value | -description | -
some content
"); - writer.close(); - } - - public void init(FilterConfig theConfig) { - } - - public void destroy() { - } - } - - private class ConfiguredAuthorizationFilter extends AuthorizationFilter { - - private ConfiguredAuthorizationFilter(String nameNode) { - this.namenode = nameNode; - } - } - - public void beginPathRestriction(WebRequest theRequest) { - theRequest.setURL("proxy-test:0", null, "/streamFile/nontestdir", - null,null); - } - - public void testPathRestriction() throws ServletException, IOException { - AuthorizationFilter filter = new - ConfiguredAuthorizationFilter("hdfs://apache.org"); - request.setRemoteIPAddress("127.0.0.1"); - request.setAttribute("org.apache.hadoop.hdfsproxy.authorized.userID", - System.getProperty("user.name")); - Listsome content
"); - writer.close(); - } - - public void init(FilterConfig theConfig) { - } - - public void destroy() { - } - } - - public void testIpRestriction() throws ServletException, IOException, - NamingException { - LdapIpDirFilter filter = new LdapIpDirFilter(); - String baseName = "ou=proxyroles,dc=mycompany,dc=com"; - DummyLdapContext dlc = new DummyLdapContext(); - filter.initialize(baseName, dlc); - request.setRemoteIPAddress("127.0.0.2"); - request.removeAttribute("org.apache.hadoop.hdfsproxy.authorized.userID"); - FilterChain mockFilterChain = new DummyFilterChain(); - filter.doFilter(request, response, mockFilterChain); - assertNull(request - .getAttribute("org.apache.hadoop.hdfsproxy.authorized.userID")); - } - - public void endIpRestriction(WebResponse theResponse) { - assertEquals(theResponse.getStatusCode(), 403); - assertTrue("Text missing 'IP not authorized to access' : : [" - + theResponse.getText() + "]", theResponse.getText().indexOf( - "not authorized to access") > 0); - } - - public void beginDoFilter(WebRequest theRequest) { - theRequest.setURL("proxy-test:0", null, "/streamFile/testdir", - null, null); - } - - public void testDoFilter() throws ServletException, IOException, - NamingException { - LdapIpDirFilter filter = new LdapIpDirFilter(); - String baseName = "ou=proxyroles,dc=mycompany,dc=com"; - DummyLdapContext dlc = new DummyLdapContext(); - filter.initialize(baseName, dlc); - request.setRemoteIPAddress("127.0.0.1"); - - ServletContext context = config.getServletContext(); - context.removeAttribute("name.node.address"); - context.removeAttribute("name.conf"); - assertNull(context.getAttribute("name.node.address")); - assertNull(context.getAttribute("name.conf")); - filter.init(config); - assertNotNull(context.getAttribute("name.node.address")); - assertNotNull(context.getAttribute("name.conf")); - - request.removeAttribute("org.apache.hadoop.hdfsproxy.authorized.userID"); - FilterChain mockFilterChain = new DummyFilterChain(); - filter.doFilter(request, response, mockFilterChain); - assertEquals(request - .getAttribute("org.apache.hadoop.hdfsproxy.authorized.userID"), - "testuser"); - - } - - public void endDoFilter(WebResponse theResponse) { - assertEquals("some content
", theResponse.getText()); - } - -} diff --git a/hdfs/src/contrib/hdfsproxy/src/test/org/apache/hadoop/hdfsproxy/TestProxyFilter.java b/hdfs/src/contrib/hdfsproxy/src/test/org/apache/hadoop/hdfsproxy/TestProxyFilter.java deleted file mode 100644 index 61c04121b7..0000000000 --- a/hdfs/src/contrib/hdfsproxy/src/test/org/apache/hadoop/hdfsproxy/TestProxyFilter.java +++ /dev/null @@ -1,120 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hdfsproxy; - -import java.io.IOException; -import java.io.PrintWriter; - -import javax.servlet.FilterChain; -import javax.servlet.FilterConfig; -import javax.servlet.ServletException; -import javax.servlet.ServletRequest; -import javax.servlet.ServletResponse; -import javax.servlet.ServletContext; - -import org.apache.cactus.FilterTestCase; -import org.apache.cactus.WebRequest; -import org.apache.cactus.WebResponse; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - - -public class TestProxyFilter extends FilterTestCase { - - public static final Log LOG = LogFactory.getLog(TestProxyFilter.class); - - private static String TEST_CLIENT_SSL_CERT = System.getProperty("javax.net.ssl.clientCert", - "./src/test/resources/ssl-keys/test.crt"); - - private class DummyFilterChain implements FilterChain { - public void doFilter(ServletRequest theRequest, ServletResponse theResponse) - throws IOException, ServletException { - PrintWriter writer = theResponse.getWriter(); - - writer.print("some content
"); - writer.close(); - } - - public void init(FilterConfig theConfig) { - } - - public void destroy() { - } - } - - public void beginDoFilterHttp(WebRequest theRequest) { - theRequest.addParameter("ugi", "nobody,test"); - } - - public void testDoFilterHttp() throws ServletException, IOException { - ProxyFilter filter = new ProxyFilter(); - - ServletContext context = config.getServletContext(); - context.removeAttribute("name.node.address"); - context.removeAttribute("name.conf"); - assertNull(context.getAttribute("name.node.address")); - assertNull(context.getAttribute("name.conf")); - - filter.init(config); - - assertNotNull(context.getAttribute("name.node.address")); - assertNotNull(context.getAttribute("name.conf")); - - request.removeAttribute("authorized.ugi"); - assertNull(request.getAttribute("authorized.ugi")); - - FilterChain mockFilterChain = new DummyFilterChain(); - filter.doFilter(request, response, mockFilterChain); - assertEquals(request.getAttribute("authorized.ugi").toString(), "nobody,test"); - - } - - public void endDoFilterHttp(WebResponse theResponse) { - assertEquals("some content
", theResponse.getText()); - } - - public void beginDoFilterHttps(WebRequest theRequest) throws Exception{ - theRequest.addParameter("UnitTest", "true"); - theRequest.addParameter("SslPath", TEST_CLIENT_SSL_CERT); - theRequest.addParameter("ugi", "nobody,test"); - theRequest.addParameter("TestSevletPathInfo", "/streamFile"); - theRequest.addParameter("filename", "/user"); - } - - public void testDoFilterHttps() throws Exception { - ProxyFilter filter = new ProxyFilter(); - - request.removeAttribute("authorized.ugi"); - assertNull(request.getAttribute("authorized.ugi")); - - FilterChain mockFilterChain = new DummyFilterChain(); - filter.init(config); - filter.doFilter(request, response, mockFilterChain); - - LOG.info("Finish setting up X509Certificate"); - assertEquals(request.getAttribute("authorized.ugi").toString().substring(0, 6), "nobody"); - - } - - public void endDoFilterHttps(WebResponse theResponse) { - assertEquals("some content
", theResponse.getText()); - } - - -} - diff --git a/hdfs/src/contrib/hdfsproxy/src/test/org/apache/hadoop/hdfsproxy/TestProxyForwardServlet.java b/hdfs/src/contrib/hdfsproxy/src/test/org/apache/hadoop/hdfsproxy/TestProxyForwardServlet.java deleted file mode 100644 index 466f63d87d..0000000000 --- a/hdfs/src/contrib/hdfsproxy/src/test/org/apache/hadoop/hdfsproxy/TestProxyForwardServlet.java +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hdfsproxy; - -import org.apache.cactus.ServletTestCase; -import org.apache.cactus.WebRequest; -import org.apache.cactus.WebResponse; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import java.io.IOException; -import javax.servlet.ServletException; - -/** Unit tests for ProxyUtil */ -public class TestProxyForwardServlet extends ServletTestCase { - public static final Log LOG = LogFactory.getLog(TestProxyForwardServlet.class); - - - public void beginDoGet(WebRequest theRequest) { - theRequest.setURL("proxy-test:0", null, "/simple", null, null); - } - - public void testDoGet() throws IOException, ServletException { - ProxyForwardServlet servlet = new ProxyForwardServlet(); - - servlet.init(config); - servlet.doGet(request, response); - } - - public void endDoGet(WebResponse theResponse) - throws IOException { - String expected = "HDFS Proxy is a proxy server through which a hadoop client (through HSFTP) or a standard - HTTPS client (wget, curl, etc) can talk to a hadoop server and more importantly pull data from - the sever. It put an access control layer in front of hadoop namenode server and - extends its functionalities to allow hadoop cross-version data transfer.
-There are multiple HDFS clusters and possibly in different hadoop versions, each holding - different data. A client need to access these data in a standard way without worrying about - version compatibility issues.
-NameNode has a http listener started at dfs.namenode.http-address
with default port 50070 when NameNode is started and it provided a HFTP interface for the client. Also it could have a https listener started at dfs.namenode.https-address
if dfs.https.enable
is defined as true (by default, dfs.https.enable
is not defined) to provide a HSFTP interface for client.
As shown in the above figure, in the client-side, proxy server will accept requests from HSFTP client and HTTPS client. The requests will pass through a filter module (containing one or more filters) for access control checking. Then the requests will go through a delegation module, whose responsibility is to direct the requests to the right client version for accessing the source cluster. After that, the delegated client will talk to the source cluster server through RPC protocol using servlets.
-To realize proxy authentication and access control, we used a servlet filter. The filter module is very - flexible, it can be installed or disabled by simply changing the corresponding items in deployment - descriptor file (web.xml). We implemented two filters in the proxy code: ProxyFilter and LdapIpDirFilter. The process of how each filter works is listed as below.
- -SSL-based proxyFilter provides strong PKI authentication and encryption, proxy server can create a self-signed CA using OpenSSL and use that CA to sign and issue certificates to clients.
-Managing access information through configuration files is a convenient way to start and easy to set-up for a small user group. However, to scale to a large user group and to handle account management operations such as add, delete, and change access, a separate package or a different mechanism like LDAP server is needed.
-The schema for the entry attributes in the LDAP server should match what is used in the proxy. The schema that is currently used in proxy is configurable through hdfsproxy-default.xml, but the attributes should always contain IP address (default as uniqueMember), userId (default as uid), user group (default as userClass), and alloable HDFS directories (default as documentLocation).
-Users can also write their own filters to plug in the filter chain to realize extended functionalities.
-As shown in the Figure, the delegation module contains two parts:
-All servlets are packaged in the WAR files.
-Strictly speaking, HDFS proxy does not by itself solve HDFS cross-version communication problem. However, through wrapping all the RPC client versions and delegating the client requests to the right version of RPC clients, HDFS proxy functions as if it can talk to multiple source clusters in different hadoop versions.
-Packaging the servlets in the WAR files has several advantages:
-Note that the inter-communication between servlets in the forwarding war and that in the specific client version war can only be through built-in data types such as int, String, etc, as such data types are loaded first through common classloader.
-Proxy server functionality is implemented using servlets deployed under servlet container. Specifically, there are 3 proxy servlets ProxyListPathsServlet
, ProxyFileDataServlet
, and ProxyStreamFile
. Together, they implement the same H(S)FTP interface as the original ListPathsServlet
, FileDataServlet
, and StreamFile
servlets do on an HDFS cluster. In fact, the proxy servlets are subclasses of the original servlets with minor changes like retrieving client UGI from the proxy server, etc. All these three servlets are put into the client war files.
The forwarding proxy, which was implemented through ProxyForwardServlet
, is put in a separate web application (ROOT.war). All client requests should be sent to the forwarding proxy. The forwarding proxy does not implement any functionality by itself. Instead, it simply forwards client requests to the right web applications with the right servlet paths.
Forwarding servlets forward requests to servlets in the right web applications through servlet cross-context communication by setting crossContext="true"
in servlet container's configuration file
Proxy server will install a servlet, ProxyFileForward
, which is a subclass of ProxyForwardServlet
, on path /file, which exposes a simple HTTPS GET interface (internally delegates the work to ProxyStreamFile
servlet via forwarding mechanism discussed above). This interface supports standard HTTP clients like curl, wget, etc. HTTPS client requests on the wire should look like https://proxy_address/file/file_path
The delegation module relies on the forwarding WAR to be able to identify the requests so that it can direct the requests to the right HDFS client RPC versions. Identifying the requests through Domain Name, which can be extracted from the request header, is a straightforward way. Note that Domain Name can have many alias through CNAME. By exploiting such a feature, we can create a Domain Name, then create many alias of this domain name, and finally make these alias correspond to different client RPC request versions. As the same time, we may need many servers to do load balancing. We can make all these servers (with different IP addresses) point to the same Domain Name in a Round-robin fashion. By doing this, we can realize default load-balancing if we have multiple through proxy servers running in the back-end.
-With Jetty-based installation, only part of proxy features are supported.
-ProxyFilter
installedName | -Description | -
---|---|
hdfsproxy.https.address | -the SSL port that hdfsproxy listens on. | -
hdfsproxy.hosts | -location of hdfsproxy-hosts file. | -
hdfsproxy.dfs.namenode.address | -namenode address of the HDFS cluster being proxied. | -
hdfsproxy.https.server.keystore.resource | -location of the resource from which ssl server keystore information will be extracted. | -
hdfsproxy.user.permissions.file.location | -location of the user permissions file. | -
hdfsproxy.user.certs.file.location | -location of the user certs file. | -
hdfsproxy.ugi.cache.ugi.lifetime | -The lifetime (in minutes) of a cached ugi. | -
Name | -Description | -
---|---|
ssl.server.truststore.location | -location of the truststore. | -
ssl.server.truststore.password | -truststore password. | -
ssl.server.keystore.location | -location of the keystore. | -
ssl.server.keystore.password | -keystore password. | -
ssl.server.keystore.keypassword | -key password. | -
Name | -Description | -
---|---|
This file defines the mappings from username to comma seperated list of certificate serial numbers that the user is allowed to use. One mapping per user. Wildcard characters, such as "*" and "?", are not recognized. Any leading or trailing whitespaces are stripped/ignored. In order for a user to be able to do "clearUgiCache" and "reloadPermFiles" command, the certification serial number he use must also belong to the user "Admin". - | -
Name | -Description | -
---|---|
This file defines the mappings from user name to comma seperated list of directories/files that the user is allowed to access. One mapping per user. Wildcard characters, such as "*" and "?", are not recognized. For example, to match "/output" directory, one can use "/output" or "/output/", but not "/output/*". Note that any leading or trailing whitespaces are stripped/ignored for the name field. - | -
Under $HADOOP_PREFIX
do the following
- $ ant clean tar
- $ cd src/contrib/hdfsproxy/
- $ ant clean tar
- The hdfsproxy-*.tar.gz
file will be generated under $HADOOP_PREFIX/build/contrib/hdfsproxy/
. Use this tar ball to proceed for the server start-up/shutdown process after necessary configuration.
-
Starting up a Jetty-based HDFS Proxy server is similar to starting up an HDFS cluster. Simply run hdfsproxy
shell command. The main configuration file is hdfsproxy-default.xml
, which should be on the classpath. hdfsproxy-env.sh
can be used to set up environmental variables. In particular, JAVA_HOME
should be set. As listed above, additional configuration files include user-certs.xml
, user-permissions.xml
and ssl-server.xml
, which are used to specify allowed user certs, allowed directories/files, and ssl keystore information for the proxy, respectively. The location of these files can be specified in hdfsproxy-default.xml
. Environmental variable HDFSPROXY_CONF_DIR
can be used to point to the directory where these configuration files are located. The configuration files (hadoop-site.xml
, or core-site.xml
and hdfs-site.xml
) of the proxied HDFS cluster should also be available on the classpath .
-
Mirroring those used in HDFS, a few shell scripts are provided to start and stop a group of proxy servers. The hosts to run hdfsproxy on are specified in hdfsproxy-hosts
file, one host per line. All hdfsproxy servers are stateless and run independently from each other.
- To start a group of proxy servers, do
- $ start-hdfsproxy.sh
-
- To stop a group of proxy servers, do
- $ stop-hdfsproxy.sh
-
- To trigger reloading of user-certs.xml
and user-permissions.xml
files on all proxy servers listed in the hdfsproxy-hosts
file, do
- $ hdfsproxy -reloadPermFiles
-
To clear the UGI caches on all proxy servers, do
- $ hdfsproxy -clearUgiCache
-
Use HSFTP client
- bin/hadoop fs -ls "hsftp://proxy.address:port/"
-
With tomcat-based installation, all HDFS Proxy features are supported
-ProxyFilter
installedLdapIpDirFilter
installedProxyFilter
is installed.LdapIpDirFilter
is installed.Name | -Description | -
---|---|
fs.defaultFS | -Source Cluster NameNode address | -
dfs.blocksize | -The block size for file tranfers | -
io.file.buffer.size | -The size of buffer for use in sequence files. The size of this buffer should probably be a multiple of hardware page size (4096 on Intel x86), and it determines how much data is buffered during read and write operations | -
Name | -Description | -
---|---|
hdfsproxy.user.permissions.file.location | -location of the user permissions file. | -
hdfsproxy.user.certs.file.location | -location of the user certs file. | -
hdfsproxy.ugi.cache.ugi.lifetime | -The lifetime (in minutes) of a cached ugi. | -
Name | -Description | -
---|---|
This file defines the mappings from username to comma seperated list of certificate serial numbers that the user is allowed to use. One mapping per user. Wildcard characters, such as "*" and "?", are not recognized. Any leading or trailing whitespaces are stripped/ignored. In order for a user to be able to do "clearUgiCache" and "reloadPermFiles" command, the certification serial number he use must also belong to the user "Admin". - | -
Name | -Description | -
---|---|
This file defines the mappings from user name to comma seperated list of directories/files that the user is allowed to access. One mapping per user. Wildcard characters, such as "*" and "?", are not recognized. For example, to match "/output" directory, one can use "/output" or "/output/", but not "/output/*". Note that any leading or trailing whitespaces are stripped/ignored for the name field. - | -
Name | -Description | -
---|---|
hdfsproxy.ldap.initial.context.factory | -LDAP context factory. | -
hdfsproxy.ldap.provider.url | -LDAP server address. | -
hdfsproxy.ldap.role.base | -LDAP role base. | -
Name | -Description | -
---|---|
This deployment descritor file defines how servlets and filters are installed in the forwarding war (ROOT.war). The default filter installed is LdapIpDirFilter , you can change to ProxyFilter with org.apache.hadoop.hdfsproxy.ProxyFilter as you filter-class . |
-
Name | -Description | -
---|---|
This deployment descritor file defines how servlets and filters are installed in the client war. The default filter installed is LdapIpDirFilter , you can change to ProxyFilter with org.apache.hadoop.hdfsproxy.ProxyFilter as you filter-class . |
-
Name | -Description | -
---|---|
You need to change Tomcat's server.xml file under $TOMCAT_HOME/conf as detailed in tomcat 6 ssl-howto. Set clientAuth="true" if you need to authenticate client.
- |
-
Name | -Description | -
---|---|
You need to change Tomcat's context.xml file under $TOMCAT_HOME/conf by adding crossContext="true" after Context .
- |
-
Suppose hdfsproxy-default.xml has been properly configured and it is under ${user.home}/proxy-root-conf dir. Under $HADOOP_PREFIX
do the following
- $ export HDFSPROXY_CONF_DIR=${user.home}/proxy-root-conf
- $ ant clean tar
- $ cd src/contrib/hdfsproxy/
- $ ant clean forward
- The hdfsproxy-forward-*.war
file will be generated under $HADOOP_PREFIX/build/contrib/hdfsproxy/
. Copy this war file to tomcat's webapps directory and rename it at ROOT.war (if ROOT dir already exists, remove it first) for deployment.
-
Suppose hdfsproxy-default.xml has been properly configured and it is under ${user.home}/proxy-client-conf dir. Under $HADOOP_PREFIX
do the following
- $ export HDFSPROXY_CONF_DIR=${user.home}/proxy-client-conf
- $ ant clean tar
- $ cd src/contrib/hdfsproxy/
- $ ant clean war
- The hdfsproxy-*.war
file will be generated under $HADOOP_PREFIX/build/contrib/hdfsproxy/
. Copy this war file to tomcat's webapps directory and rename it properly for deployment.
-
To proxy for multiple source clusters, you need to do the following:
-Starting up and shutting down Tomcat-based HDFS Proxy server is no more than starting up and shutting down tomcat server with tomcat's bin/startup.sh and bin/shutdown.sh script.
- If you need to authenticate client certs, you need either set truststoreFile
and truststorePass
following tomcat 6 ssl-howto in the configuration stage or give the truststore location by doing the following
- export JAVA_OPTS="-Djavax.net.ssl.trustStore=${user.home}/truststore-location -Djavax.net.ssl.trustStorePassword=trustpass"
- before you start-up tomcat.
-
HTTPS client
- curl -k "https://proxy.address:port/file/file-path"
- wget --no-check-certificate "https://proxy.address:port/file/file-path"
-
HADOOP client
- bin/hadoop fs -ls "hsftp://proxy.address:port/"
-
Name | -Description | -
---|---|
ssl.client.do.not.authenticate.server | -if true, trust all server certificates, like curl's -k option | -
ssl.client.truststore.location | -Location of truststore | -
ssl.client.truststore.password | -truststore password | -
ssl.client.truststore.type | -truststore type | -
ssl.client.keystore.location | -Location of keystore | -
ssl.client.keystore.password | -keystore password | -
ssl.client.keystore.type | -keystore type | -
ssl.client.keystore.keypassword | -keystore key password | -
ssl.expiration.warn.days | -server certificate expiration war days threshold, 0 means no warning should be issued | -