diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java index 21120df6ed..7d31103d97 100644 --- a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java @@ -87,6 +87,10 @@ public final class AdlConfKeys { "adl.feature.support.acl.bit"; static final boolean ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION_DEFAULT = true; + static final String ADL_ENABLEUPN_FOR_OWNERGROUP_KEY = + "adl.feature.ownerandgroup.enableupn"; + static final boolean ADL_ENABLEUPN_FOR_OWNERGROUP_DEFAULT = false; + private AdlConfKeys() { } } diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java index fb0feda886..e0e273e6f2 100644 --- a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java @@ -32,6 +32,7 @@ import com.microsoft.azure.datalake.store.DirectoryEntry; import com.microsoft.azure.datalake.store.DirectoryEntryType; import com.microsoft.azure.datalake.store.IfExists; import com.microsoft.azure.datalake.store.LatencyTracker; +import com.microsoft.azure.datalake.store.UserGroupRepresentation; import com.microsoft.azure.datalake.store.oauth2.AccessTokenProvider; import com.microsoft.azure.datalake.store.oauth2.ClientCredsTokenProvider; import com.microsoft.azure.datalake.store.oauth2.RefreshTokenBasedTokenProvider; @@ -80,6 +81,8 @@ public class AdlFileSystem extends FileSystem { private ADLStoreClient adlClient; private Path workingDirectory; private boolean aclBitStatus; + private UserGroupRepresentation oidOrUpn; + // retained for tests private AccessTokenProvider tokenProvider; @@ -181,6 +184,11 @@ public class AdlFileSystem extends FileSystem { if (!trackLatency) { LatencyTracker.disable(); } + + boolean enableUPN = conf.getBoolean(ADL_ENABLEUPN_FOR_OWNERGROUP_KEY, + ADL_ENABLEUPN_FOR_OWNERGROUP_DEFAULT); + oidOrUpn = enableUPN ? UserGroupRepresentation.UPN : + UserGroupRepresentation.OID; } /** @@ -439,7 +447,8 @@ public class AdlFileSystem extends FileSystem { @Override public FileStatus getFileStatus(final Path f) throws IOException { statistics.incrementReadOps(1); - DirectoryEntry entry = adlClient.getDirectoryEntry(toRelativeFilePath(f)); + DirectoryEntry entry = + adlClient.getDirectoryEntry(toRelativeFilePath(f), oidOrUpn); return toFileStatus(entry, f); } @@ -456,7 +465,7 @@ public class AdlFileSystem extends FileSystem { public FileStatus[] listStatus(final Path f) throws IOException { statistics.incrementReadOps(1); List entries = - adlClient.enumerateDirectory(toRelativeFilePath(f)); + adlClient.enumerateDirectory(toRelativeFilePath(f), oidOrUpn); return toFileStatuses(entries, f); } @@ -749,8 +758,8 @@ public class AdlFileSystem extends FileSystem { @Override public AclStatus getAclStatus(final Path path) throws IOException { statistics.incrementReadOps(1); - com.microsoft.azure.datalake.store.acl.AclStatus adlStatus = adlClient - .getAclStatus(toRelativeFilePath(path)); + com.microsoft.azure.datalake.store.acl.AclStatus adlStatus = + adlClient.getAclStatus(toRelativeFilePath(path), oidOrUpn); AclStatus.Builder aclStatusBuilder = new AclStatus.Builder(); aclStatusBuilder.owner(adlStatus.owner); aclStatusBuilder.group(adlStatus.group); @@ -963,4 +972,10 @@ public class AdlFileSystem extends FileSystem { } return new String(passchars); } + + @VisibleForTesting + public void setUserGroupRepresentationAsUPN(boolean enableUPN) { + oidOrUpn = enableUPN ? UserGroupRepresentation.UPN : + UserGroupRepresentation.OID; + } } diff --git a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md index 5037db61f6..6d9e1735e4 100644 --- a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md +++ b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md @@ -26,6 +26,7 @@ * [Protecting the Credentials with Credential Providers](#Credential_Provider) * [Enabling ADL Filesystem](#Enabling_ADL) * [Accessing adl URLs](#Accessing_adl_URLs) + * [User/Group Representation](#OIDtoUPNConfiguration) * [Testing the hadoop-azure Module](#Testing_the_hadoop-azure_Module) ## Introduction @@ -42,6 +43,8 @@ The jar file is named azure-datalake-store.jar. * Can act as a source of data in a MapReduce job, or a sink. * Tested on both Linux and Windows. * Tested for scale. +* API setOwner/setAcl/removeAclEntries/modifyAclEntries accepts UPN or OID + (Object ID) as user and group name. ## Limitations Partial or no support for the following operations : @@ -221,6 +224,29 @@ commands demonstrate access to a storage account named `youraccount`. > hadoop fs -cat adl://yourcontainer.azuredatalakestore.net/testDir/testFile test file content + +### User/Group Representation +The hadoop-azure-datalake module provides support for configuring how +User/Group information is represented during +getFileStatus/listStatus/getAclStatus. + +Add the following properties to your core-site.xml + + + adl.feature.ownerandgroup.enableupn + true + + When true : User and Group in FileStatus/AclStatus response is + represented as user friendly name as per Azure AD profile. + + When false (default) : User and Group in FileStatus/AclStatus + response is represented by the unique identifier from Azure AD + profile (Object ID as GUID). + + For performance optimization, Recommended default value. + + + ## Testing the azure-datalake-store Module The hadoop-azure module includes a full suite of unit tests. Most of the tests will run without additional configuration by running mvn test. This includes tests against mocked storage, which is an in-memory emulation of Azure Data Lake Storage. diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java index e3025b2aef..4cabaa3d08 100644 --- a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java @@ -26,6 +26,10 @@ import static org.apache.hadoop.fs.adl.AdlConfKeys .ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER; import static org.apache.hadoop.fs.adl.AdlConfKeys .ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_ENABLEUPN_FOR_OWNERGROUP_DEFAULT; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_ENABLEUPN_FOR_OWNERGROUP_KEY; import static org.apache.hadoop.fs.adl.AdlConfKeys .ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT; import static org.apache.hadoop.fs.adl.AdlConfKeys @@ -99,5 +103,10 @@ public class TestValidateConfiguration { Assert.assertEquals(false, ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT); Assert.assertEquals(4 * 1024 * 1024, DEFAULT_READ_AHEAD_BUFFER_SIZE); Assert.assertEquals(4 * 1024 * 1024, DEFAULT_WRITE_AHEAD_BUFFER_SIZE); + + Assert.assertEquals("adl.feature.ownerandgroup.enableupn", + ADL_ENABLEUPN_FOR_OWNERGROUP_KEY); + Assert.assertEquals(false, + ADL_ENABLEUPN_FOR_OWNERGROUP_DEFAULT); } } diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestMetadata.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestMetadata.java index 3b9e7da86d..dbcaa390d6 100644 --- a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestMetadata.java +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestMetadata.java @@ -22,6 +22,7 @@ import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.adl.AdlFileSystem; import org.junit.After; import org.junit.Assert; import org.junit.Assume; @@ -32,6 +33,8 @@ import java.io.IOException; import java.io.OutputStream; import java.util.UUID; +import static org.junit.Assert.fail; + /** * This class is responsible for testing ContentSummary, ListStatus on * file/folder. @@ -107,5 +110,35 @@ public class TestMetadata { .assertEquals(path.makeQualified(fs.getUri(), fs.getWorkingDirectory()), statuses[0].getPath()); } + + @Test + public void testUserRepresentationConfiguration() throws IOException { + // Validating actual user/group OID or friendly name is outside scope of + // this test. + Path path = new Path(parent, "a.txt"); + AdlFileSystem fs = (AdlFileSystem) adlStore; + + // When set to true, User/Group information should be user friendly name. + // That is non GUID value. + fs.setUserGroupRepresentationAsUPN(false); + fs.createNewFile(path); + Assert.assertTrue(fs.isFile(path)); + FileStatus fileStatus = fs.getFileStatus(path); + UUID.fromString(fileStatus.getGroup()); + UUID.fromString(fileStatus.getOwner()); + + // When set to false, User/Group information should be AAD represented + // unique OID. That is GUID value. + // Majority of the cases, user friendly name would not be GUID value. + fs.setUserGroupRepresentationAsUPN(true); + fileStatus = fs.getFileStatus(path); + try { + UUID.fromString(fileStatus.getGroup()); + UUID.fromString(fileStatus.getOwner()); + fail("Expected user friendly name to be non guid value."); + } catch (IllegalArgumentException e) { + // expected to fail since + } + } }