From 1db4788b7d22e57f91520e4a6971774ef84ffab9 Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Tue, 8 Aug 2017 16:27:23 -0700 Subject: [PATCH] HADOOP-14598. Blacklist Http/HttpsFileSystem in FsUrlStreamHandlerFactory. Contributed by Steve Loughran. --- .../org/apache/hadoop/fs/FsUrlConnection.java | 10 ++++ .../hadoop/fs/FsUrlStreamHandlerFactory.java | 26 +++++++++- .../hadoop/fs/TestUrlStreamHandler.java | 48 ++++++++++++++----- 3 files changed, 72 insertions(+), 12 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlConnection.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlConnection.java index 90e75b0ccb..03c7aeddd9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlConnection.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlConnection.java @@ -23,6 +23,10 @@ import java.net.URL; import java.net.URLConnection; +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -33,6 +37,8 @@ @InterfaceAudience.Private @InterfaceStability.Unstable class FsUrlConnection extends URLConnection { + private static final Logger LOG = + LoggerFactory.getLogger(FsUrlConnection.class); private Configuration conf; @@ -40,12 +46,16 @@ class FsUrlConnection extends URLConnection { FsUrlConnection(Configuration conf, URL url) { super(url); + Preconditions.checkArgument(conf != null, "null conf argument"); + Preconditions.checkArgument(url != null, "null url argument"); this.conf = conf; } @Override public void connect() throws IOException { + Preconditions.checkState(is == null, "Already connected"); try { + LOG.debug("Connecting to {}", url); FileSystem fs = FileSystem.get(url.toURI(), conf); is = fs.open(new Path(url.getPath())); } catch (URISyntaxException e) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java index 91a527dce7..751b95516c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsUrlStreamHandlerFactory.java @@ -22,6 +22,9 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -41,6 +44,18 @@ public class FsUrlStreamHandlerFactory implements URLStreamHandlerFactory { + private static final Logger LOG = + LoggerFactory.getLogger(FsUrlStreamHandlerFactory.class); + + /** + * These are the protocols with MUST NOT be exported, as doing so + * would conflict with the standard URL handlers registered by + * the JVM. Many things will break. + */ + public static final String[] UNEXPORTED_PROTOCOLS = { + "http", "https" + }; + // The configuration holds supported FS implementation class names. private Configuration conf; @@ -64,14 +79,20 @@ public FsUrlStreamHandlerFactory(Configuration conf) { throw new RuntimeException(io); } this.handler = new FsUrlStreamHandler(this.conf); + for (String protocol : UNEXPORTED_PROTOCOLS) { + protocols.put(protocol, false); + } } @Override public java.net.URLStreamHandler createURLStreamHandler(String protocol) { + LOG.debug("Creating handler for protocol {}", protocol); if (!protocols.containsKey(protocol)) { boolean known = true; try { - FileSystem.getFileSystemClass(protocol, conf); + Class impl + = FileSystem.getFileSystemClass(protocol, conf); + LOG.debug("Found implementation of {}: {}", protocol, impl); } catch (IOException ex) { known = false; @@ -79,9 +100,12 @@ public java.net.URLStreamHandler createURLStreamHandler(String protocol) { protocols.put(protocol, known); } if (protocols.get(protocol)) { + LOG.debug("Using handler for protocol {}", protocol); return handler; } else { // FileSystem does not know the protocol, let the VM handle this + LOG.debug("Unknown protocol {}, delegating to default implementation", + protocol); return null; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUrlStreamHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUrlStreamHandler.java index 6fc97a2948..5a04f67846 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUrlStreamHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestUrlStreamHandler.java @@ -19,6 +19,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import java.io.File; import java.io.IOException; @@ -32,6 +33,8 @@ import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.test.PathUtils; + +import org.junit.BeforeClass; import org.junit.Test; /** @@ -39,8 +42,22 @@ */ public class TestUrlStreamHandler { - private static final File TEST_ROOT_DIR = PathUtils.getTestDir(TestUrlStreamHandler.class); - + private static final File TEST_ROOT_DIR = + PathUtils.getTestDir(TestUrlStreamHandler.class); + + private static final FsUrlStreamHandlerFactory HANDLER_FACTORY + = new FsUrlStreamHandlerFactory(); + + @BeforeClass + public static void setupHandler() { + + // Setup our own factory + // setURLStreamHandlerFactor is can be set at most once in the JVM + // the new URLStreamHandler is valid for all tests cases + // in TestStreamHandler + URL.setURLStreamHandlerFactory(HANDLER_FACTORY); + } + /** * Test opening and reading from an InputStream through a hdfs:// URL. *

@@ -55,15 +72,6 @@ public void testDfsUrls() throws IOException { Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); FileSystem fs = cluster.getFileSystem(); - - // Setup our own factory - // setURLSteramHandlerFactor is can be set at most once in the JVM - // the new URLStreamHandler is valid for all tests cases - // in TestStreamHandler - FsUrlStreamHandlerFactory factory = - new org.apache.hadoop.fs.FsUrlStreamHandlerFactory(); - java.net.URL.setURLStreamHandlerFactory(factory); - Path filePath = new Path("/thefile"); try { @@ -156,4 +164,22 @@ public void testFileUrls() throws IOException, URISyntaxException { } + @Test + public void testHttpDefaultHandler() throws Throwable { + assertNull("Handler for HTTP is the Hadoop one", + HANDLER_FACTORY.createURLStreamHandler("http")); + } + + @Test + public void testHttpsDefaultHandler() throws Throwable { + assertNull("Handler for HTTPS is the Hadoop one", + HANDLER_FACTORY.createURLStreamHandler("https")); + } + + @Test + public void testUnknownProtocol() throws Throwable { + assertNull("Unknown protocols are not handled", + HANDLER_FACTORY.createURLStreamHandler("gopher")); + } + }