HADOOP-14598. Blacklist Http/HttpsFileSystem in FsUrlStreamHandlerFactory. Contributed by Steve Loughran.

This commit is contained in:
Haohui Mai 2017-08-08 16:27:23 -07:00
parent f4e1aa0508
commit 1db4788b7d
3 changed files with 72 additions and 12 deletions

View File

@ -23,6 +23,10 @@
import java.net.URL; import java.net.URL;
import java.net.URLConnection; import java.net.URLConnection;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@ -33,6 +37,8 @@
@InterfaceAudience.Private @InterfaceAudience.Private
@InterfaceStability.Unstable @InterfaceStability.Unstable
class FsUrlConnection extends URLConnection { class FsUrlConnection extends URLConnection {
private static final Logger LOG =
LoggerFactory.getLogger(FsUrlConnection.class);
private Configuration conf; private Configuration conf;
@ -40,12 +46,16 @@ class FsUrlConnection extends URLConnection {
FsUrlConnection(Configuration conf, URL url) { FsUrlConnection(Configuration conf, URL url) {
super(url); super(url);
Preconditions.checkArgument(conf != null, "null conf argument");
Preconditions.checkArgument(url != null, "null url argument");
this.conf = conf; this.conf = conf;
} }
@Override @Override
public void connect() throws IOException { public void connect() throws IOException {
Preconditions.checkState(is == null, "Already connected");
try { try {
LOG.debug("Connecting to {}", url);
FileSystem fs = FileSystem.get(url.toURI(), conf); FileSystem fs = FileSystem.get(url.toURI(), conf);
is = fs.open(new Path(url.getPath())); is = fs.open(new Path(url.getPath()));
} catch (URISyntaxException e) { } catch (URISyntaxException e) {

View File

@ -22,6 +22,9 @@
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@ -41,6 +44,18 @@
public class FsUrlStreamHandlerFactory implements public class FsUrlStreamHandlerFactory implements
URLStreamHandlerFactory { URLStreamHandlerFactory {
private static final Logger LOG =
LoggerFactory.getLogger(FsUrlStreamHandlerFactory.class);
/**
* These are the protocols with MUST NOT be exported, as doing so
* would conflict with the standard URL handlers registered by
* the JVM. Many things will break.
*/
public static final String[] UNEXPORTED_PROTOCOLS = {
"http", "https"
};
// The configuration holds supported FS implementation class names. // The configuration holds supported FS implementation class names.
private Configuration conf; private Configuration conf;
@ -64,14 +79,20 @@ public FsUrlStreamHandlerFactory(Configuration conf) {
throw new RuntimeException(io); throw new RuntimeException(io);
} }
this.handler = new FsUrlStreamHandler(this.conf); this.handler = new FsUrlStreamHandler(this.conf);
for (String protocol : UNEXPORTED_PROTOCOLS) {
protocols.put(protocol, false);
}
} }
@Override @Override
public java.net.URLStreamHandler createURLStreamHandler(String protocol) { public java.net.URLStreamHandler createURLStreamHandler(String protocol) {
LOG.debug("Creating handler for protocol {}", protocol);
if (!protocols.containsKey(protocol)) { if (!protocols.containsKey(protocol)) {
boolean known = true; boolean known = true;
try { try {
FileSystem.getFileSystemClass(protocol, conf); Class<? extends FileSystem> impl
= FileSystem.getFileSystemClass(protocol, conf);
LOG.debug("Found implementation of {}: {}", protocol, impl);
} }
catch (IOException ex) { catch (IOException ex) {
known = false; known = false;
@ -79,9 +100,12 @@ public java.net.URLStreamHandler createURLStreamHandler(String protocol) {
protocols.put(protocol, known); protocols.put(protocol, known);
} }
if (protocols.get(protocol)) { if (protocols.get(protocol)) {
LOG.debug("Using handler for protocol {}", protocol);
return handler; return handler;
} else { } else {
// FileSystem does not know the protocol, let the VM handle this // FileSystem does not know the protocol, let the VM handle this
LOG.debug("Unknown protocol {}, delegating to default implementation",
protocol);
return null; return null;
} }
} }

View File

@ -19,6 +19,7 @@
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
@ -32,6 +33,8 @@
import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.test.PathUtils; import org.apache.hadoop.test.PathUtils;
import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
/** /**
@ -39,7 +42,21 @@
*/ */
public class TestUrlStreamHandler { public class TestUrlStreamHandler {
private static final File TEST_ROOT_DIR = PathUtils.getTestDir(TestUrlStreamHandler.class); private static final File TEST_ROOT_DIR =
PathUtils.getTestDir(TestUrlStreamHandler.class);
private static final FsUrlStreamHandlerFactory HANDLER_FACTORY
= new FsUrlStreamHandlerFactory();
@BeforeClass
public static void setupHandler() {
// Setup our own factory
// setURLStreamHandlerFactor is can be set at most once in the JVM
// the new URLStreamHandler is valid for all tests cases
// in TestStreamHandler
URL.setURLStreamHandlerFactory(HANDLER_FACTORY);
}
/** /**
* Test opening and reading from an InputStream through a hdfs:// URL. * Test opening and reading from an InputStream through a hdfs:// URL.
@ -55,15 +72,6 @@ public void testDfsUrls() throws IOException {
Configuration conf = new HdfsConfiguration(); Configuration conf = new HdfsConfiguration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
FileSystem fs = cluster.getFileSystem(); FileSystem fs = cluster.getFileSystem();
// Setup our own factory
// setURLSteramHandlerFactor is can be set at most once in the JVM
// the new URLStreamHandler is valid for all tests cases
// in TestStreamHandler
FsUrlStreamHandlerFactory factory =
new org.apache.hadoop.fs.FsUrlStreamHandlerFactory();
java.net.URL.setURLStreamHandlerFactory(factory);
Path filePath = new Path("/thefile"); Path filePath = new Path("/thefile");
try { try {
@ -156,4 +164,22 @@ public void testFileUrls() throws IOException, URISyntaxException {
} }
@Test
public void testHttpDefaultHandler() throws Throwable {
assertNull("Handler for HTTP is the Hadoop one",
HANDLER_FACTORY.createURLStreamHandler("http"));
}
@Test
public void testHttpsDefaultHandler() throws Throwable {
assertNull("Handler for HTTPS is the Hadoop one",
HANDLER_FACTORY.createURLStreamHandler("https"));
}
@Test
public void testUnknownProtocol() throws Throwable {
assertNull("Unknown protocols are not handled",
HANDLER_FACTORY.createURLStreamHandler("gopher"));
}
} }