HDFS-15449. Optionally ignore port number in mount-table name when picking from initialized uri. Contributed by Uma Maheswara Rao G.

This commit is contained in:
Uma Maheswara Rao G 2020-07-06 18:50:03 -07:00 committed by GitHub
parent e820baa6e6
commit dc0626b5f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 97 additions and 15 deletions

View File

@ -104,4 +104,17 @@ public interface Constants {
"fs.viewfs.mount.links.as.symlinks";
boolean CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS_DEFAULT = true;
/**
* When initializing the viewfs, authority will be used as the mount table
* name to find the mount link configurations. To make the mount table name
* unique, we may want to ignore port if initialized uri authority contains
* port number. By default, we will consider port number also in
* ViewFileSystem(This default value false, because to support existing
* deployments continue with the current behavior).
*/
String CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME =
"fs.viewfs.ignore.port.in.mount.table.name";
boolean CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT = false;
}

View File

@ -20,6 +20,8 @@
import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs;
import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_ENABLE_INNER_CACHE;
import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_ENABLE_INNER_CACHE_DEFAULT;
import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME;
import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT;
import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS;
import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS_DEFAULT;
import static org.apache.hadoop.fs.viewfs.Constants.PERMISSION_555;
@ -274,9 +276,15 @@ public void initialize(final URI theUri, final Configuration conf)
final InnerCache innerCache = new InnerCache(fsGetter);
// Now build client side view (i.e. client side mount table) from config.
final String authority = theUri.getAuthority();
String tableName = authority;
if (theUri.getPort() != -1 && config
.getBoolean(CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME,
CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT)) {
tableName = theUri.getHost();
}
try {
myUri = new URI(getScheme(), authority, "/", null, null);
fsState = new InodeTree<FileSystem>(conf, authority) {
fsState = new InodeTree<FileSystem>(conf, tableName) {
@Override
protected FileSystem getTargetFileSystem(final URI uri)
throws URISyntaxException, IOException {

View File

@ -31,6 +31,8 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.UnsupportedFileSystemException;
import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME;
/******************************************************************************
* This class is extended from the ViewFileSystem for the overloaded scheme
* file system. Mount link configurations and in-memory mount table
@ -85,9 +87,14 @@
* Op3: Create file s3a://bucketA/salesDB/dbfile will go to
* s3a://bucketA/salesDB/dbfile
*
* Note: In ViewFileSystemOverloadScheme, by default the mount links will be
* Note:
* (1) In ViewFileSystemOverloadScheme, by default the mount links will be
* represented as non-symlinks. If you want to change this behavior, please see
* {@link ViewFileSystem#listStatus(Path)}
* (2) In ViewFileSystemOverloadScheme, only the initialized uri's hostname will
* be considered as the mount table name. When the passed uri has hostname:port,
* it will simply ignore the port number and only hostname will be considered as
* the mount table name.
*****************************************************************************/
@InterfaceAudience.LimitedPrivate({ "MapReduce", "HBase", "Hive" })
@InterfaceStability.Evolving
@ -115,6 +122,10 @@ public void initialize(URI theUri, Configuration conf) throws IOException {
conf.setBoolean(Constants.CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS,
conf.getBoolean(Constants.CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS,
false));
/* the default value to true in ViewFSOverloadScheme */
conf.setBoolean(CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME,
conf.getBoolean(Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME,
true));
if (null != mountTableConfigPath) {
MountTableConfigLoader loader = new HCFSMountTableConfigLoader();
loader.load(mountTableConfigPath, conf);

View File

@ -28,7 +28,11 @@ View File System Overload Scheme
### Details
The View File System Overload Scheme is an extension to the View File System. This will allow users to continue to use their existing fs.defaultFS configured scheme or any new scheme name instead of using scheme `viewfs`. Mount link configurations key, value formats are same as in [ViewFS Guide](./ViewFs.html). If a user wants to continue use the same fs.defaultFS and wants to have more mount points, then mount link configurations should have the current fs.defaultFS authority name as mount table name. Example if fs.defaultFS is `hdfs://mycluster`, then the mount link configuration key name should be like in the following format `fs.viewfs.mounttable.*mycluster*.link.<mountLinkPath>`. We will discuss more example configurations in following sections.
The View File System Overload Scheme is an extension to the View File System. This will allow users to continue to use their existing fs.defaultFS configured scheme or any new scheme name instead of using scheme `viewfs`.
Mount link configurations key, value formats are same as in [ViewFS Guide](./ViewFs.html).
If a user wants to continue use the same fs.defaultFS and wants to have more mount points, then mount link configurations should have the ViewFileSystemOverloadScheme initialized uri's hostname as the mount table name.
Example if fs.defaultFS is `hdfs://mycluster`, then the mount link configuration key name should be like in the following format `fs.viewfs.mounttable.*mycluster*.link.<mountLinkPath>`.
Even if the initialized fs uri has hostname:port, it will simply ignore the port number and only consider the hostname as the mount table name. We will discuss more example configurations in following sections.
Another important improvement with the ViewFileSystemOverloadScheme is, administrators need not copy the `mount-table.xml` configuration file to 1000s of client nodes. Instead they can keep the mount-table configuration file in a Hadoop compatible file system. So, keeping the configuration file in a central place makes administrators life easier as they can update mount-table in single place.
@ -171,7 +175,7 @@ For example, when the following configuration is used but a path like `viewfs:/f
</property>
```
#### Solution
### Solution
To avoid the above problem, the configuration `fs.viewfs.mounttable.default.name.key` has to be set to the name of the cluster, i.e, the following should be added to `core-site.xml`
```xml
<property>

View File

@ -17,6 +17,8 @@
*/
package org.apache.hadoop.fs.viewfs;
import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME;
import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT;
import static org.junit.Assume.assumeTrue;
import java.io.File;
@ -73,6 +75,8 @@ public void setUp() throws Exception {
FsConstants.FS_VIEWFS_OVERLOAD_SCHEME_TARGET_FS_IMPL_PATTERN,
"hdfs"),
DistributedFileSystem.class.getName());
conf.setBoolean(CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME,
CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT);
URI defaultFSURI =
URI.create(conf.get(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY));
ConfigUtil.addLink(conf, defaultFSURI.getAuthority(), "/user",

View File

@ -45,6 +45,8 @@
import org.junit.Before;
import org.junit.Test;
import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME;
import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT;
import static org.junit.Assert.*;
@ -79,6 +81,8 @@ public void startCluster() throws IOException {
conf.set(String.format(
FsConstants.FS_VIEWFS_OVERLOAD_SCHEME_TARGET_FS_IMPL_PATTERN,
HDFS_SCHEME), DistributedFileSystem.class.getName());
conf.setBoolean(CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME,
CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT);
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
cluster.waitClusterUp();
defaultFSURI =
@ -365,7 +369,7 @@ public void testInvalidOverloadSchemeTargetFS() throws Exception {
if (mountTableIfSet != null) {
conf.set(Constants.CONFIG_VIEWFS_MOUNTTABLE_PATH, mountTableIfSet);
}
addMountLinks(defaultFSURI.getAuthority(),
addMountLinks(defaultFSURI.getHost(),
new String[] {HDFS_USER_FOLDER, LOCAL_FOLDER,
Constants.CONFIG_VIEWFS_LINK_FALLBACK },
new String[] {hdfsTargetPath.toUri().toString(),
@ -593,6 +597,45 @@ public void testNflyRepair() throws Exception {
}
}
/**
* Tests that the fs initialization should ignore the port number when it's
* extracting the mount table name from uri.
*/
@Test(timeout = 30000)
public void testMountTableNameShouldIgnorePortFromURI() throws Exception {
final Path hdfsTargetPath = new Path(defaultFSURI + HDFS_USER_FOLDER);
conf = new Configuration(getConf());
addMountLinks(defaultFSURI.getHost(),
new String[] {HDFS_USER_FOLDER, LOCAL_FOLDER,
Constants.CONFIG_VIEWFS_LINK_FALLBACK},
new String[] {hdfsTargetPath.toUri().toString(),
localTargetDir.toURI().toString(),
hdfsTargetPath.toUri().toString()}, conf);
conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY,
defaultFSURI.toString());
conf.set(String.format(FS_IMPL_PATTERN_KEY, HDFS_SCHEME),
ViewFileSystemOverloadScheme.class.getName());
conf.set(String
.format(FsConstants.FS_VIEWFS_OVERLOAD_SCHEME_TARGET_FS_IMPL_PATTERN,
HDFS_SCHEME), DistributedFileSystem.class.getName());
conf.setBoolean(CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME, true);
Path testDirOnRoot = new Path("/test");
URI uriWithoutPort = new URI("hdfs://" + defaultFSURI.getHost());
//Initialize with out port
try (FileSystem fs = FileSystem
.get(uriWithoutPort, conf)) {
fs.mkdirs(testDirOnRoot);
fs.delete(testDirOnRoot, true);
}
//Initialize with port
try (FileSystem fs = FileSystem.get(defaultFSURI, conf)) {
fs.mkdirs(testDirOnRoot);
fs.delete(testDirOnRoot, true);
}
}
private void writeString(final FileSystem nfly, final String testString,
final Path testFile) throws IOException {
try (FSDataOutputStream fsDos = nfly.create(testFile)) {

View File

@ -151,7 +151,7 @@ void addMountLinks(String mountTable, String[] sources, String[] targets,
@Test
public void testSaveNameSpace() throws Exception {
final Path hdfsTargetPath = new Path(defaultFSURI + HDFS_USER_FOLDER);
addMountLinks(defaultFSURI.getAuthority(),
addMountLinks(defaultFSURI.getHost(),
new String[] {HDFS_USER_FOLDER, LOCAL_FOLDER },
new String[] {hdfsTargetPath.toUri().toString(),
localTargetDir.toURI().toString() },
@ -177,7 +177,7 @@ public void testSaveNameSpace() throws Exception {
@Test
public void testSaveNamespaceWithoutSpecifyingFS() throws Exception {
final Path hdfsTargetPath = new Path(defaultFSURI + HDFS_USER_FOLDER);
addMountLinks(defaultFSURI.getAuthority(),
addMountLinks(defaultFSURI.getHost(),
new String[] {HDFS_USER_FOLDER, LOCAL_FOLDER },
new String[] {hdfsTargetPath.toUri().toString(),
localTargetDir.toURI().toString() },
@ -200,9 +200,8 @@ public void testSaveNamespaceWithoutSpecifyingFS() throws Exception {
public void testSafeModeWithWrongFS() throws Exception {
final Path hdfsTargetPath =
new Path("hdfs://nonExistent" + HDFS_USER_FOLDER);
addMountLinks(defaultFSURI.getAuthority(),
new String[] {HDFS_USER_FOLDER },
new String[] {hdfsTargetPath.toUri().toString(), }, conf);
addMountLinks(defaultFSURI.getHost(), new String[] {HDFS_USER_FOLDER},
new String[] {hdfsTargetPath.toUri().toString()}, conf);
final DFSAdmin dfsAdmin = new DFSAdmin(conf);
redirectStream();
int ret = ToolRunner.run(dfsAdmin, new String[] {"-safemode", "enter" });
@ -215,7 +214,7 @@ public void testSafeModeWithWrongFS() throws Exception {
*/
@Test
public void testSafeModeShouldFailOnLocalTargetFS() throws Exception {
addMountLinks(defaultFSURI.getAuthority(), new String[] {LOCAL_FOLDER },
addMountLinks(defaultFSURI.getHost(), new String[] {LOCAL_FOLDER },
new String[] {localTargetDir.toURI().toString() }, conf);
final DFSAdmin dfsAdmin = new DFSAdmin(conf);
// ViewFSOveloadScheme uri with localfs mount point
@ -247,8 +246,8 @@ public void testSafeModeShouldFailWithoutMountTables() throws Exception {
@Test
public void testAllowAndDisalllowSnapShot() throws Exception {
final Path hdfsTargetPath = new Path(defaultFSURI + HDFS_USER_FOLDER);
addMountLinks(defaultFSURI.getAuthority(),
new String[] {HDFS_USER_FOLDER, LOCAL_FOLDER },
addMountLinks(defaultFSURI.getHost(),
new String[] {HDFS_USER_FOLDER, LOCAL_FOLDER},
new String[] {hdfsTargetPath.toUri().toString(),
localTargetDir.toURI().toString() },
conf);
@ -270,7 +269,7 @@ public void testAllowAndDisalllowSnapShot() throws Exception {
@Test
public void testSetBalancerBandwidth() throws Exception {
final Path hdfsTargetPath = new Path(defaultFSURI + HDFS_USER_FOLDER);
addMountLinks(defaultFSURI.getAuthority(),
addMountLinks(defaultFSURI.getHost(),
new String[] {HDFS_USER_FOLDER, LOCAL_FOLDER },
new String[] {hdfsTargetPath.toUri().toString(),
localTargetDir.toURI().toString() },

View File

@ -142,7 +142,7 @@ public void testDFWithViewFsOverloadScheme() throws Exception {
List<String> mounts = Lists.newArrayList();
mounts.add(HDFS_USER_FOLDER);
mounts.add(LOCAL_FOLDER);
addMountLinks(defaultFSURI.getAuthority(),
addMountLinks(defaultFSURI.getHost(),
mounts.toArray(new String[mounts.size()]),
new String[] {hdfsTargetPath.toUri().toString(),
localTargetDir.toURI().toString() },