HADOOP-14598. Blacklist Http/HttpsFileSystem in FsUrlStreamHandlerFactory. Contributed by Steve Loughran.

This commit is contained in:
Haohui Mai 2017-08-08 16:27:23 -07:00
parent f4e1aa0508
commit 1db4788b7d
3 changed files with 72 additions and 12 deletions

View File

@ -23,6 +23,10 @@
import java.net.URL;
import java.net.URLConnection;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
@ -33,6 +37,8 @@
@InterfaceAudience.Private
@InterfaceStability.Unstable
class FsUrlConnection extends URLConnection {
private static final Logger LOG =
LoggerFactory.getLogger(FsUrlConnection.class);
private Configuration conf;
@ -40,12 +46,16 @@ class FsUrlConnection extends URLConnection {
FsUrlConnection(Configuration conf, URL url) {
super(url);
Preconditions.checkArgument(conf != null, "null conf argument");
Preconditions.checkArgument(url != null, "null url argument");
this.conf = conf;
}
@Override
public void connect() throws IOException {
Preconditions.checkState(is == null, "Already connected");
try {
LOG.debug("Connecting to {}", url);
FileSystem fs = FileSystem.get(url.toURI(), conf);
is = fs.open(new Path(url.getPath()));
} catch (URISyntaxException e) {

View File

@ -22,6 +22,9 @@
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
@ -41,6 +44,18 @@
public class FsUrlStreamHandlerFactory implements
URLStreamHandlerFactory {
private static final Logger LOG =
LoggerFactory.getLogger(FsUrlStreamHandlerFactory.class);
/**
* These are the protocols with MUST NOT be exported, as doing so
* would conflict with the standard URL handlers registered by
* the JVM. Many things will break.
*/
public static final String[] UNEXPORTED_PROTOCOLS = {
"http", "https"
};
// The configuration holds supported FS implementation class names.
private Configuration conf;
@ -64,14 +79,20 @@ public FsUrlStreamHandlerFactory(Configuration conf) {
throw new RuntimeException(io);
}
this.handler = new FsUrlStreamHandler(this.conf);
for (String protocol : UNEXPORTED_PROTOCOLS) {
protocols.put(protocol, false);
}
}
@Override
public java.net.URLStreamHandler createURLStreamHandler(String protocol) {
LOG.debug("Creating handler for protocol {}", protocol);
if (!protocols.containsKey(protocol)) {
boolean known = true;
try {
FileSystem.getFileSystemClass(protocol, conf);
Class<? extends FileSystem> impl
= FileSystem.getFileSystemClass(protocol, conf);
LOG.debug("Found implementation of {}: {}", protocol, impl);
}
catch (IOException ex) {
known = false;
@ -79,9 +100,12 @@ public java.net.URLStreamHandler createURLStreamHandler(String protocol) {
protocols.put(protocol, known);
}
if (protocols.get(protocol)) {
LOG.debug("Using handler for protocol {}", protocol);
return handler;
} else {
// FileSystem does not know the protocol, let the VM handle this
LOG.debug("Unknown protocol {}, delegating to default implementation",
protocol);
return null;
}
}

View File

@ -19,6 +19,7 @@
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import java.io.File;
import java.io.IOException;
@ -32,6 +33,8 @@
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.test.PathUtils;
import org.junit.BeforeClass;
import org.junit.Test;
/**
@ -39,8 +42,22 @@
*/
public class TestUrlStreamHandler {
private static final File TEST_ROOT_DIR = PathUtils.getTestDir(TestUrlStreamHandler.class);
private static final File TEST_ROOT_DIR =
PathUtils.getTestDir(TestUrlStreamHandler.class);
private static final FsUrlStreamHandlerFactory HANDLER_FACTORY
= new FsUrlStreamHandlerFactory();
@BeforeClass
public static void setupHandler() {
// Setup our own factory
// setURLStreamHandlerFactor is can be set at most once in the JVM
// the new URLStreamHandler is valid for all tests cases
// in TestStreamHandler
URL.setURLStreamHandlerFactory(HANDLER_FACTORY);
}
/**
* Test opening and reading from an InputStream through a hdfs:// URL.
* <p>
@ -55,15 +72,6 @@ public void testDfsUrls() throws IOException {
Configuration conf = new HdfsConfiguration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
FileSystem fs = cluster.getFileSystem();
// Setup our own factory
// setURLSteramHandlerFactor is can be set at most once in the JVM
// the new URLStreamHandler is valid for all tests cases
// in TestStreamHandler
FsUrlStreamHandlerFactory factory =
new org.apache.hadoop.fs.FsUrlStreamHandlerFactory();
java.net.URL.setURLStreamHandlerFactory(factory);
Path filePath = new Path("/thefile");
try {
@ -156,4 +164,22 @@ public void testFileUrls() throws IOException, URISyntaxException {
}
@Test
public void testHttpDefaultHandler() throws Throwable {
assertNull("Handler for HTTP is the Hadoop one",
HANDLER_FACTORY.createURLStreamHandler("http"));
}
@Test
public void testHttpsDefaultHandler() throws Throwable {
assertNull("Handler for HTTPS is the Hadoop one",
HANDLER_FACTORY.createURLStreamHandler("https"));
}
@Test
public void testUnknownProtocol() throws Throwable {
assertNull("Unknown protocols are not handled",
HANDLER_FACTORY.createURLStreamHandler("gopher"));
}
}