diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index ffcab2cc26..7c4b0f1f98 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -2586,11 +2586,16 @@
ClientCredential
Defines Azure Active Directory OAuth2 access token provider type.
- Supported types are ClientCredential, RefreshToken, and Custom.
+ Supported types are ClientCredential, RefreshToken, MSI, DeviceCode,
+ and Custom.
The ClientCredential type requires property fs.adl.oauth2.client.id,
fs.adl.oauth2.credential, and fs.adl.oauth2.refresh.url.
The RefreshToken type requires property fs.adl.oauth2.client.id and
fs.adl.oauth2.refresh.token.
+ The MSI type requires properties fs.adl.oauth2.msi.port and
+ fs.adl.oauth2.msi.tenantguid.
+ The DeviceCode type requires property
+ fs.adl.oauth2.devicecode.clientapp.id.
The Custom type requires property fs.adl.oauth2.access.token.provider.
@@ -2627,6 +2632,36 @@
+
+ fs.adl.oauth2.msi.port
+
+
+ The localhost port for the MSI token service. This is the port specified
+ when creating the Azure VM.
+ Used by MSI token provider.
+
+
+
+
+ fs.adl.oauth2.msi.tenantguid
+
+
+ The tenant guid for the Azure AAD tenant under which the azure data lake
+ store account is created.
+ Used by MSI token provider.
+
+
+
+
+ fs.adl.oauth2.devicecode.clientapp.id
+
+
+ The app id of the AAD native app in whose context the auth request
+ should be made.
+ Used by DeviceCode token provider.
+
+
+
diff --git a/hadoop-tools/hadoop-azure-datalake/pom.xml b/hadoop-tools/hadoop-azure-datalake/pom.xml
index 3aed5e1135..47f12df7f0 100644
--- a/hadoop-tools/hadoop-azure-datalake/pom.xml
+++ b/hadoop-tools/hadoop-azure-datalake/pom.xml
@@ -110,7 +110,7 @@
com.microsoft.azure
azure-data-lake-store-sdk
- 2.1.4
+ 2.2.1
diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java
index 31df22254c..f77d98100c 100644
--- a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java
+++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java
@@ -54,6 +54,14 @@ public final class AdlConfKeys {
public static final String TOKEN_PROVIDER_TYPE_CLIENT_CRED =
"ClientCredential";
+ // MSI Auth Configuration
+ public static final String MSI_PORT = "fs.adl.oauth2.msi.port";
+ public static final String MSI_TENANT_GUID = "fs.adl.oauth2.msi.tenantguid";
+
+ // DeviceCode Auth configuration
+ public static final String DEVICE_CODE_CLIENT_APP_ID =
+ "fs.adl.oauth2.devicecode.clientapp.id";
+
public static final String READ_AHEAD_BUFFER_SIZE_KEY =
"adl.feature.client.cache.readahead";
diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java
index 76ce43eb6d..a5e31e153c 100644
--- a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java
+++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java
@@ -34,6 +34,8 @@
import com.microsoft.azure.datalake.store.UserGroupRepresentation;
import com.microsoft.azure.datalake.store.oauth2.AccessTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.ClientCredsTokenProvider;
+import com.microsoft.azure.datalake.store.oauth2.DeviceCodeTokenProvider;
+import com.microsoft.azure.datalake.store.oauth2.MsiTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.RefreshTokenBasedTokenProvider;
import org.apache.commons.lang.StringUtils;
@@ -254,6 +256,12 @@ private AccessTokenProvider getAccessTokenProvider(Configuration config)
case ClientCredential:
tokenProvider = getConfCredentialBasedTokenProvider(conf);
break;
+ case MSI:
+ tokenProvider = getMsiBasedTokenProvider(conf);
+ break;
+ case DeviceCode:
+ tokenProvider = getDeviceCodeTokenProvider(conf);
+ break;
case Custom:
default:
AzureADTokenProvider azureADTokenProvider = getCustomAccessTokenProvider(
@@ -280,6 +288,19 @@ private AccessTokenProvider getConfRefreshTokenBasedTokenProvider(
return new RefreshTokenBasedTokenProvider(clientId, refreshToken);
}
+ private AccessTokenProvider getMsiBasedTokenProvider(
+ Configuration conf) throws IOException {
+ int port = Integer.parseInt(getNonEmptyVal(conf, MSI_PORT));
+ String tenantGuid = getPasswordString(conf, MSI_TENANT_GUID);
+ return new MsiTokenProvider(port, tenantGuid);
+ }
+
+ private AccessTokenProvider getDeviceCodeTokenProvider(
+ Configuration conf) throws IOException {
+ String clientAppId = getNonEmptyVal(conf, DEVICE_CODE_CLIENT_APP_ID);
+ return new DeviceCodeTokenProvider(clientAppId);
+ }
+
@VisibleForTesting
AccessTokenProvider getTokenProvider() {
return tokenProvider;
diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/TokenProviderType.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/TokenProviderType.java
index 9fd4f4f46b..1c11d848dc 100644
--- a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/TokenProviderType.java
+++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/TokenProviderType.java
@@ -21,5 +21,7 @@
enum TokenProviderType {
RefreshToken,
ClientCredential,
+ MSI,
+ DeviceCode,
Custom
}
diff --git a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md
index d4b7d8ef95..e34da36d56 100644
--- a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md
+++ b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md
@@ -111,20 +111,24 @@ service associated with the client id. See [*Active Directory Library For Java*]
##### Generating the Service Principal
1. Go to [the portal](https://portal.azure.com)
-2. Under "Browse", look for Active Directory and click on it.
-3. Create "Web Application". Remember the name you create here - that is what you will add to your ADL account as authorized user.
+2. Under services in left nav, look for Azure Active Directory and click it.
+3. Using "App Registrations" in the menu, create "Web Application". Remember
+ the name you create here - that is what you will add to your ADL account
+ as authorized user.
4. Go through the wizard
-5. Once app is created, Go to app configuration, and find the section on "keys"
+5. Once app is created, go to "keys" under "settings" for the app
6. Select a key duration and hit save. Save the generated keys.
-7. Note down the properties you will need to auth:
- - The client ID
+7. Go back to the App Registrations page, and click on the "Endpoints" button
+ at the top
+ a. Note down the "Token Endpoint" URL
+8. Note down the properties you will need to auth:
+ - The "Application ID" of the Web App you created above
- The key you just generated above
- - The token endpoint (select "View endpoints" at the bottom of the page and copy/paste the OAuth2 .0 Token Endpoint value)
- - Resource: Always https://management.core.windows.net/ , for all customers
+ - The token endpoint
##### Adding the service principal to your ADL Account
1. Go to the portal again, and open your ADL account
-2. Select Users under Settings
+2. Select `Access control (IAM)`
3. Add your user name you created in Step 6 above (note that it does not show up in the list, but will be found if you searched for the name)
4. Add "Owner" role
@@ -153,6 +157,84 @@ Add the following properties to your `core-site.xml`
```
+#### Using MSI (Managed Service Identity)
+
+Azure VMs can be provisioned with "service identities" that are managed by the
+Identity extension within the VM. The advantage of doing this is that the
+credentials are managed by the extension, and do not have to be put into
+core-site.xml.
+
+To use MSI, the following two steps are needed:
+1. Modify the VM deployment template to specify the port number of the token
+ service exposed to localhost by the identity extension in the VM.
+2. Get your Azure ActiveDirectory Tenant ID:
+ 1. Go to [the portal](https://portal.azure.com)
+ 2. Under services in left nav, look for Azure Active Directory and click on it.
+ 3. Click on Properties
+ 4. Note down the GUID shown under "Directory ID" - this is your AAD tenant ID
+
+
+##### Configure core-site.xml
+Add the following properties to your `core-site.xml`
+
+```xml
+
+ fs.adl.oauth2.access.token.provider.type
+ Msi
+
+
+
+ fs.adl.oauth2.msi.port
+ PORT NUMBER FROM STEP 1 ABOVE
+
+
+
+ fs.adl.oauth2.msi.TenantGuid
+ AAD TENANT ID GUID FROM STEP 2 ABOVE
+
+```
+
+### Using Device Code Auth for interactive login
+
+**Note:** This auth method is suitable for running interactive tools, but will
+not work for jobs submitted to a cluster.
+
+To use user-based login, Azure ActiveDirectory provides login flow using
+device code.
+
+To use device code flow, user must first create a **Native** app registration
+in the Azure portal, and provide the client ID for the app as a config. Here
+are the steps:
+
+1. Go to [the portal](https://portal.azure.com)
+2. Under services in left nav, look for Azure Active Directory and click on it.
+3. Using "App Registrations" in the menu, create "Native Application".
+4. Go through the wizard
+5. Once app is created, note down the "Appplication ID" of the app
+6. Grant permissions to the app:
+ 1. Click on "Permissions" for the app, and then add "Azure Data Lake" and
+ "Windows Azure Service Management API" permissions
+ 2. Click on "Grant Permissions" to add the permissions to the app
+
+Add the following properties to your `core-site.xml`
+
+```xml
+
+ fs.adl.oauth2.devicecode.clientappid
+ APP ID FROM STEP 5 ABOVE
+
+```
+
+It is usually not desirable to add DeviceCode as the default token provider
+type. But it can be used when using a local command:
+```
+ hadoop fs -Dfs.adl.oauth2.access.token.provider.type=DeviceCode -ls ...
+```
+Running this will print a URL and device code that can be used to login from
+any browser (even on a different machine, outside of the ssh session). Once
+the login is done, the command continues.
+
+
#### Protecting the Credentials with Credential Providers
In many Hadoop clusters, the `core-site.xml` file is world-readable. To protect
diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java
index 36498c6696..929b33a042 100644
--- a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java
+++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java
@@ -23,6 +23,8 @@
import java.net.URI;
import java.net.URISyntaxException;
+import com.microsoft.azure.datalake.store.oauth2.DeviceCodeTokenProvider;
+import com.microsoft.azure.datalake.store.oauth2.MsiTokenProvider;
import org.apache.commons.lang.builder.EqualsBuilder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.adl.common.CustomMockTokenProvider;
@@ -40,6 +42,9 @@
.AZURE_AD_TOKEN_PROVIDER_CLASS_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.AZURE_AD_TOKEN_PROVIDER_TYPE_KEY;
+import static org.apache.hadoop.fs.adl.AdlConfKeys.DEVICE_CODE_CLIENT_APP_ID;
+import static org.apache.hadoop.fs.adl.AdlConfKeys.MSI_PORT;
+import static org.apache.hadoop.fs.adl.AdlConfKeys.MSI_TENANT_GUID;
import static org.apache.hadoop.fs.adl.TokenProviderType.*;
import static org.junit.Assert.assertEquals;
@@ -97,6 +102,41 @@ public void testClientCredTokenProvider()
Assert.assertTrue(tokenProvider instanceof ClientCredsTokenProvider);
}
+ @Test
+ public void testMSITokenProvider()
+ throws IOException, URISyntaxException {
+ Configuration conf = new Configuration();
+ conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, MSI);
+ conf.set(MSI_PORT, "54321");
+ conf.set(MSI_TENANT_GUID, "TENANT_GUID");
+
+ URI uri = new URI("adl://localhost:8080");
+ AdlFileSystem fileSystem = new AdlFileSystem();
+ fileSystem.initialize(uri, conf);
+ AccessTokenProvider tokenProvider = fileSystem.getTokenProvider();
+ Assert.assertTrue(tokenProvider instanceof MsiTokenProvider);
+ }
+
+ @Test
+ public void testDeviceCodeTokenProvider()
+ throws IOException, URISyntaxException {
+ boolean runTest = false;
+ if (runTest) {
+ // Device code auth method causes an interactive prompt, so run this only
+ // when running the test interactively at a local terminal. Disabling
+ // test by default, to not break any automation.
+ Configuration conf = new Configuration();
+ conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, DeviceCode);
+ conf.set(DEVICE_CODE_CLIENT_APP_ID, "CLIENT_APP_ID_GUID");
+
+ URI uri = new URI("adl://localhost:8080");
+ AdlFileSystem fileSystem = new AdlFileSystem();
+ fileSystem.initialize(uri, conf);
+ AccessTokenProvider tokenProvider = fileSystem.getTokenProvider();
+ Assert.assertTrue(tokenProvider instanceof DeviceCodeTokenProvider);
+ }
+ }
+
@Test
public void testCustomCredTokenProvider()
throws URISyntaxException, IOException {