From e015b563197a475e354bf84fd27e7bbcc67e00a4 Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Fri, 20 Jan 2017 14:34:02 +0800 Subject: [PATCH] HADOOP-13956. Read ADLS credentials from Credential Provider. (John Zhuge via lei) --- .../site/markdown/CredentialProviderAPI.md | 1 + .../apache/hadoop/fs/adl/AdlFileSystem.java | 37 ++++- .../src/site/markdown/index.md | 44 +++++ .../fs/adl/TestAzureADTokenProvider.java | 152 ++++++++++++++++++ 4 files changed, 226 insertions(+), 8 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/CredentialProviderAPI.md b/hadoop-common-project/hadoop-common/src/site/markdown/CredentialProviderAPI.md index a40bf2b8a2..30dfdd8f5e 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/CredentialProviderAPI.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/CredentialProviderAPI.md @@ -102,6 +102,7 @@ In summary, first, provision the credentials into a provider then configure the |YARN |WebAppUtils uptakes the use of the credential provider API through the new method on Configuration called getPassword. This provides an alternative to storing the passwords in clear text within the ssl-server.xml file while maintaining backward compatibility.|TODO| |AWS
S3/S3A |Uses Configuration.getPassword to get the S3 credentials. They may be resolved through the credential provider API or from the config for backward compatibility.|[AWS S3/S3A Usage](../../hadoop-aws/tools/hadoop-aws/index.html)| |Azure
WASB |Uses Configuration.getPassword to get the WASB credentials. They may be resolved through the credential provider API or from the config for backward compatibility.|[Azure WASB Usage](../../hadoop-azure/index.html)| +|Azure
ADLS |Uses Configuration.getPassword to get the ADLS credentials. They may be resolved through the credential provider API or from the config for backward compatibility.|[Azure ADLS Usage](../../hadoop-azure-datalake/index.html)| |Apache
Accumulo|The trace.password property is used by the Tracer to authenticate with Accumulo and persist the traces in the trace table. The credential provider API is used to acquire the trace.password from a provider or from configuration for backward compatibility.|TODO| |Apache
Slider |A capability has been added to Slider to prompt the user for needed passwords and store them using CredentialProvider so they can be retrieved by an app later.|TODO| |Apache
Hive |Protection of the metastore password, SSL related passwords and JDO string password has been added through the use of the Credential Provider API|TODO| diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java index bd43c52164..3d41025245 100644 --- a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java @@ -58,10 +58,12 @@ import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.ProviderUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.VersionInfo; + import static org.apache.hadoop.fs.adl.AdlConfKeys.*; /** @@ -224,8 +226,10 @@ protected synchronized AzureADTokenProvider getCustomAccessTokenProvider( return azureTokenProvider; } - private AccessTokenProvider getAccessTokenProvider(Configuration conf) + private AccessTokenProvider getAccessTokenProvider(Configuration config) throws IOException { + Configuration conf = ProviderUtils.excludeIncompatibleCredentialProviders( + config, AdlFileSystem.class); TokenProviderType type = conf.getEnum( AdlConfKeys.AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, TokenProviderType.Custom); @@ -248,17 +252,17 @@ private AccessTokenProvider getAccessTokenProvider(Configuration conf) } private AccessTokenProvider getConfCredentialBasedTokenProvider( - Configuration conf) { - String clientId = getNonEmptyVal(conf, AZURE_AD_CLIENT_ID_KEY); - String refreshUrl = getNonEmptyVal(conf, AZURE_AD_REFRESH_URL_KEY); - String clientSecret = getNonEmptyVal(conf, AZURE_AD_CLIENT_SECRET_KEY); + Configuration conf) throws IOException { + String clientId = getPasswordString(conf, AZURE_AD_CLIENT_ID_KEY); + String refreshUrl = getPasswordString(conf, AZURE_AD_REFRESH_URL_KEY); + String clientSecret = getPasswordString(conf, AZURE_AD_CLIENT_SECRET_KEY); return new ClientCredsTokenProvider(refreshUrl, clientId, clientSecret); } private AccessTokenProvider getConfRefreshTokenBasedTokenProvider( - Configuration conf) { - String clientId = getNonEmptyVal(conf, AZURE_AD_CLIENT_ID_KEY); - String refreshToken = getNonEmptyVal(conf, AZURE_AD_REFRESH_TOKEN_KEY); + Configuration conf) throws IOException { + String clientId = getPasswordString(conf, AZURE_AD_CLIENT_ID_KEY); + String refreshToken = getPasswordString(conf, AZURE_AD_REFRESH_TOKEN_KEY); return new RefreshTokenBasedTokenProvider(clientId, refreshToken); } @@ -938,4 +942,21 @@ private static String getNonEmptyVal(Configuration conf, String key) { return value; } + /** + * A wrapper of {@link Configuration#getPassword(String)}. It returns + * String instead of char[]. + * + * @param conf the configuration + * @param key the property key + * @return the password string + * @throws IOException if the password was not found + */ + private static String getPasswordString(Configuration conf, String key) + throws IOException { + char[] passchars = conf.getPassword(key); + if (passchars == null) { + throw new IOException("Password " + key + " not found"); + } + return new String(passchars); + } } diff --git a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md index 00825d101c..ced5cff88e 100644 --- a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md +++ b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md @@ -23,6 +23,7 @@ * [Configuring Credentials & FileSystem](#Configuring_Credentials) * [Using Refresh Token](#Refresh_Token) * [Using Client Keys](#Client_Credential_Token) + * [Protecting the Credentials with Credential Providers](#Credential_Provider) * [Enabling ADL Filesystem](#Enabling_ADL) * [Accessing adl URLs](#Accessing_adl_URLs) * [Testing the hadoop-azure Module](#Testing_the_hadoop-azure_Module) @@ -139,6 +140,49 @@ Add the following properties to your core-site.xml +### Protecting the Credentials with Credential Providers + +In many Hadoop clusters, the core-site.xml file is world-readable. To protect +these credentials from prying eyes, it is recommended that you use the +credential provider framework to securely store them and access them through +configuration. + +All ADLS credential properties can be protected by credential providers. +For additional reading on the credential provider API, see +[Credential Provider API](../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html). + +#### Provisioning + +``` +% hadoop credential create dfs.adls.oauth2.refresh.token -value 123 + -provider localjceks://file/home/foo/adls.jceks +% hadoop credential create dfs.adls.oauth2.credential -value 123 + -provider localjceks://file/home/foo/adls.jceks +``` + +#### Configuring core-site.xml or command line property + +``` + + hadoop.security.credential.provider.path + localjceks://file/home/foo/adls.jceks + Path to interrogate for protected credentials. + +``` + +#### Running DistCp + +``` +% hadoop distcp + [-D hadoop.security.credential.provider.path=localjceks://file/home/user/adls.jceks] + hdfs://:9001/user/foo/007020615 + adl://.azuredatalakestore.net/testDir/ +``` + +NOTE: You may optionally add the provider path property to the distcp command +line instead of added job specific configuration to a generic core-site.xml. +The square brackets above illustrate this capability. + ## Enabling ADL Filesystem diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java index c94e692943..70f2a7f992 100644 --- a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java @@ -18,10 +18,12 @@ package org.apache.hadoop.fs.adl; +import java.io.File; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import org.apache.commons.lang.builder.EqualsBuilder; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.adl.common.CustomMockTokenProvider; import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider; @@ -39,15 +41,29 @@ import static org.apache.hadoop.fs.adl.AdlConfKeys .AZURE_AD_TOKEN_PROVIDER_TYPE_KEY; import static org.apache.hadoop.fs.adl.TokenProviderType.*; +import static org.junit.Assert.assertEquals; +import org.apache.hadoop.security.ProviderUtils; +import org.apache.hadoop.security.alias.CredentialProvider; +import org.apache.hadoop.security.alias.CredentialProviderFactory; import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; /** * Test appropriate token provider is loaded as per configuration. */ public class TestAzureADTokenProvider { + private static final String CLIENT_ID = "MY_CLIENT_ID"; + private static final String REFRESH_TOKEN = "MY_REFRESH_TOKEN"; + private static final String CLIENT_SECRET = "MY_CLIENT_SECRET"; + private static final String REFRESH_URL = "http://localhost:8080/refresh"; + + @Rule + public final TemporaryFolder tempDir = new TemporaryFolder(); + @Test public void testRefreshTokenProvider() throws URISyntaxException, IOException { @@ -130,4 +146,140 @@ public void testInvalidProviderConfigurationForClassPath() e.getMessage().contains("wrong.classpath.CustomMockTokenProvider")); } } + + private CredentialProvider createTempCredProvider(Configuration conf) + throws URISyntaxException, IOException { + final File file = tempDir.newFile("test.jks"); + final URI jks = ProviderUtils.nestURIForLocalJavaKeyStoreProvider( + file.toURI()); + conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, + jks.toString()); + return CredentialProviderFactory.getProviders(conf).get(0); + } + + @Test + public void testRefreshTokenWithCredentialProvider() + throws IOException, URISyntaxException { + Configuration conf = new Configuration(); + conf.set(AZURE_AD_CLIENT_ID_KEY, "DUMMY"); + conf.set(AZURE_AD_REFRESH_TOKEN_KEY, "DUMMY"); + conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, RefreshToken); + + CredentialProvider provider = createTempCredProvider(conf); + provider.createCredentialEntry(AZURE_AD_CLIENT_ID_KEY, + CLIENT_ID.toCharArray()); + provider.createCredentialEntry(AZURE_AD_REFRESH_TOKEN_KEY, + REFRESH_TOKEN.toCharArray()); + provider.flush(); + + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + fileSystem.initialize(uri, conf); + RefreshTokenBasedTokenProvider expected = + new RefreshTokenBasedTokenProvider(CLIENT_ID, REFRESH_TOKEN); + Assert.assertTrue(EqualsBuilder.reflectionEquals(expected, + fileSystem.getTokenProvider())); + } + + @Test + public void testRefreshTokenWithCredentialProviderFallback() + throws IOException, URISyntaxException { + Configuration conf = new Configuration(); + conf.set(AZURE_AD_CLIENT_ID_KEY, CLIENT_ID); + conf.set(AZURE_AD_REFRESH_TOKEN_KEY, REFRESH_TOKEN); + conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, RefreshToken); + + createTempCredProvider(conf); + + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + fileSystem.initialize(uri, conf); + RefreshTokenBasedTokenProvider expected = + new RefreshTokenBasedTokenProvider(CLIENT_ID, REFRESH_TOKEN); + Assert.assertTrue(EqualsBuilder.reflectionEquals(expected, + fileSystem.getTokenProvider())); + } + + @Test + public void testClientCredWithCredentialProvider() + throws IOException, URISyntaxException { + Configuration conf = new Configuration(); + conf.set(AZURE_AD_CLIENT_ID_KEY, "DUMMY"); + conf.set(AZURE_AD_CLIENT_SECRET_KEY, "DUMMY"); + conf.set(AZURE_AD_REFRESH_URL_KEY, "DUMMY"); + conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, ClientCredential); + + CredentialProvider provider = createTempCredProvider(conf); + provider.createCredentialEntry(AZURE_AD_CLIENT_ID_KEY, + CLIENT_ID.toCharArray()); + provider.createCredentialEntry(AZURE_AD_CLIENT_SECRET_KEY, + CLIENT_SECRET.toCharArray()); + provider.createCredentialEntry(AZURE_AD_REFRESH_URL_KEY, + REFRESH_URL.toCharArray()); + provider.flush(); + + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + fileSystem.initialize(uri, conf); + ClientCredsTokenProvider expected = new ClientCredsTokenProvider( + REFRESH_URL, CLIENT_ID, CLIENT_SECRET); + Assert.assertTrue(EqualsBuilder.reflectionEquals(expected, + fileSystem.getTokenProvider())); + } + + @Test + public void testClientCredWithCredentialProviderFallback() + throws IOException, URISyntaxException { + Configuration conf = new Configuration(); + conf.set(AZURE_AD_CLIENT_ID_KEY, CLIENT_ID); + conf.set(AZURE_AD_CLIENT_SECRET_KEY, CLIENT_SECRET); + conf.set(AZURE_AD_REFRESH_URL_KEY, REFRESH_URL); + conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, ClientCredential); + + createTempCredProvider(conf); + + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + fileSystem.initialize(uri, conf); + ClientCredsTokenProvider expected = new ClientCredsTokenProvider( + REFRESH_URL, CLIENT_ID, CLIENT_SECRET); + Assert.assertTrue(EqualsBuilder.reflectionEquals(expected, + fileSystem.getTokenProvider())); + } + + @Test + public void testCredentialProviderPathExclusions() throws Exception { + String providerPath = + "user:///,jceks://adl/user/hrt_qa/sqoopdbpasswd.jceks," + + "jceks://hdfs@nn1.example.com/my/path/test.jceks"; + Configuration config = new Configuration(); + config.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, + providerPath); + String newPath = + "user:///,jceks://hdfs@nn1.example.com/my/path/test.jceks"; + + excludeAndTestExpectations(config, newPath); + } + + @Test + public void testExcludeAllProviderTypesFromConfig() throws Exception { + String providerPath = + "jceks://adl/tmp/test.jceks," + + "jceks://adl@/my/path/test.jceks"; + Configuration config = new Configuration(); + config.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, + providerPath); + String newPath = null; + + excludeAndTestExpectations(config, newPath); + } + + void excludeAndTestExpectations(Configuration config, String newPath) + throws Exception { + Configuration conf = ProviderUtils.excludeIncompatibleCredentialProviders( + config, AdlFileSystem.class); + String effectivePath = conf.get( + CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, null); + assertEquals(newPath, effectivePath); + } }