Skip to content

Commit 005030f

Browse files
authored
HADOOP-18610: [ABFS] OAuth2 Token Provider support for Azure Workload Identity (#6787)
Add support for Azure Active Directory (Azure AD) workload identities which integrate with the Kubernetes's native capabilities to federate with any external identity provider. Contributed By: Anuj Modi
1 parent bb30545 commit 005030f

File tree

13 files changed

+594
-27
lines changed

13 files changed

+594
-27
lines changed

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
import org.apache.hadoop.fs.azurebfs.oauth2.MsiTokenProvider;
6060
import org.apache.hadoop.fs.azurebfs.oauth2.RefreshTokenBasedTokenProvider;
6161
import org.apache.hadoop.fs.azurebfs.oauth2.UserPasswordTokenProvider;
62+
import org.apache.hadoop.fs.azurebfs.oauth2.WorkloadIdentityTokenProvider;
6263
import org.apache.hadoop.fs.azurebfs.security.AbfsDelegationTokenManager;
6364
import org.apache.hadoop.fs.azurebfs.services.AuthType;
6465
import org.apache.hadoop.fs.azurebfs.services.ExponentialRetryPolicy;
@@ -983,6 +984,20 @@ public AccessTokenProvider getTokenProvider() throws TokenAccessProviderExceptio
983984
tokenProvider = new RefreshTokenBasedTokenProvider(authEndpoint,
984985
clientId, refreshToken);
985986
LOG.trace("RefreshTokenBasedTokenProvider initialized");
987+
} else if (tokenProviderClass == WorkloadIdentityTokenProvider.class) {
988+
String authority = appendSlashIfNeeded(
989+
getTrimmedPasswordString(FS_AZURE_ACCOUNT_OAUTH_MSI_AUTHORITY,
990+
AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_MSI_AUTHORITY));
991+
String tenantGuid =
992+
getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_MSI_TENANT);
993+
String clientId =
994+
getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID);
995+
String tokenFile =
996+
getTrimmedPasswordString(FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE,
997+
AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE);
998+
tokenProvider = new WorkloadIdentityTokenProvider(
999+
authority, tenantGuid, clientId, tokenFile);
1000+
LOG.trace("WorkloadIdentityTokenProvider initialized");
9861001
} else {
9871002
throw new IllegalArgumentException("Failed to initialize " + tokenProviderClass);
9881003
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AuthConfigurations.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ public final class AuthConfigurations {
3939
public static final String
4040
DEFAULT_FS_AZURE_ACCOUNT_OAUTH_REFRESH_TOKEN_ENDPOINT =
4141
"https://login.microsoftonline.com/Common/oauth2/token";
42+
/** Default OAuth token file path for the workload identity flow. */
43+
public static final String
44+
DEFAULT_FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE =
45+
"/var/run/secrets/azure/tokens/azure-identity-token";
4246

4347
private AuthConfigurations() {
4448
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,8 @@ public final class ConfigurationKeys {
273273
public static final String FS_AZURE_ACCOUNT_OAUTH_REFRESH_TOKEN = "fs.azure.account.oauth2.refresh.token";
274274
/** Key for oauth AAD refresh token endpoint: {@value}. */
275275
public static final String FS_AZURE_ACCOUNT_OAUTH_REFRESH_TOKEN_ENDPOINT = "fs.azure.account.oauth2.refresh.token.endpoint";
276+
/** Key for oauth AAD workload identity token file path: {@value}. */
277+
public static final String FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE = "fs.azure.account.oauth2.token.file";
276278
/** Key for enabling the tracking of ABFS API latency and sending the latency numbers to the ABFS API service */
277279
public static final String FS_AZURE_ABFS_LATENCY_TRACK = "fs.azure.abfs.latency.track";
278280

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ public final class AzureADAuthenticator {
5757
private static final Logger LOG = LoggerFactory.getLogger(AzureADAuthenticator.class);
5858
private static final String RESOURCE_NAME = "https://storage.azure.com/";
5959
private static final String SCOPE = "https://storage.azure.com/.default";
60+
private static final String JWT_BEARER_ASSERTION = "urn:ietf:params:oauth:client-assertion-type:jwt-bearer";
61+
private static final String CLIENT_CREDENTIALS = "client_credentials";
62+
private static final String OAUTH_VERSION_2_0 = "/oauth2/v2.0/";
6063
private static final int CONNECT_TIMEOUT = 30 * 1000;
6164
private static final int READ_TIMEOUT = 30 * 1000;
6265

@@ -95,22 +98,61 @@ public static AzureADToken getTokenUsingClientCreds(String authEndpoint,
9598
Preconditions.checkNotNull(authEndpoint, "authEndpoint");
9699
Preconditions.checkNotNull(clientId, "clientId");
97100
Preconditions.checkNotNull(clientSecret, "clientSecret");
98-
boolean isVersion2AuthenticationEndpoint = authEndpoint.contains("/oauth2/v2.0/");
99101

100102
QueryParams qp = new QueryParams();
101-
if (isVersion2AuthenticationEndpoint) {
103+
if (isVersion2AuthenticationEndpoint(authEndpoint)) {
102104
qp.add("scope", SCOPE);
103105
} else {
104106
qp.add("resource", RESOURCE_NAME);
105107
}
106-
qp.add("grant_type", "client_credentials");
108+
qp.add("grant_type", CLIENT_CREDENTIALS);
107109
qp.add("client_id", clientId);
108110
qp.add("client_secret", clientSecret);
109111
LOG.debug("AADToken: starting to fetch token using client creds for client ID " + clientId);
110112

111113
return getTokenCall(authEndpoint, qp.serialize(), null, null);
112114
}
113115

116+
/**
117+
* Gets Azure Active Directory token using the user ID and a JWT assertion
118+
* generated by a federated authentication process.
119+
*
120+
* The federation process uses a feature from Azure Active Directory
121+
* called workload identity. A workload identity is an identity used
122+
* by a software workload (such as an application, service, script,
123+
* or container) to authenticate and access other services and resources.
124+
*
125+
*
126+
* @param authEndpoint the OAuth 2.0 token endpoint associated
127+
* with the user's directory (obtain from
128+
* Active Directory configuration)
129+
* @param clientId the client ID (GUID) of the client web app
130+
* obtained from Azure Active Directory configuration
131+
* @param clientAssertion the JWT assertion token
132+
* @return {@link AzureADToken} obtained using the creds
133+
* @throws IOException throws IOException if there is a failure in connecting to Azure AD
134+
*/
135+
public static AzureADToken getTokenUsingJWTAssertion(String authEndpoint,
136+
String clientId, String clientAssertion) throws IOException {
137+
Preconditions.checkNotNull(authEndpoint, "authEndpoint");
138+
Preconditions.checkNotNull(clientId, "clientId");
139+
Preconditions.checkNotNull(clientAssertion, "clientAssertion");
140+
141+
QueryParams qp = new QueryParams();
142+
if (isVersion2AuthenticationEndpoint(authEndpoint)) {
143+
qp.add("scope", SCOPE);
144+
} else {
145+
qp.add("resource", RESOURCE_NAME);
146+
}
147+
qp.add("grant_type", CLIENT_CREDENTIALS);
148+
qp.add("client_id", clientId);
149+
qp.add("client_assertion", clientAssertion);
150+
qp.add("client_assertion_type", JWT_BEARER_ASSERTION);
151+
LOG.debug("AADToken: starting to fetch token using client assertion for client ID " + clientId);
152+
153+
return getTokenCall(authEndpoint, qp.serialize(), null, "POST");
154+
}
155+
114156
/**
115157
* Gets AAD token from the local virtual machine's VM extension. This only works on
116158
* an Azure VM with MSI extension
@@ -523,4 +565,8 @@ private static String consumeInputStream(InputStream inStream, int length) throw
523565

524566
return new String(b, 0, totalBytesRead, StandardCharsets.UTF_8);
525567
}
568+
569+
private static boolean isVersion2AuthenticationEndpoint(String authEndpoint) {
570+
return authEndpoint.contains(OAUTH_VERSION_2_0);
571+
}
526572
}
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.azurebfs.oauth2;
20+
21+
import java.io.File;
22+
import java.io.IOException;
23+
24+
import org.slf4j.Logger;
25+
import org.slf4j.LoggerFactory;
26+
import org.apache.commons.io.FileUtils;
27+
import org.apache.hadoop.classification.VisibleForTesting;
28+
import org.apache.hadoop.thirdparty.com.google.common.base.Strings;
29+
import org.apache.hadoop.util.Preconditions;
30+
31+
/**
32+
* Provides tokens based on Azure AD Workload Identity.
33+
*/
34+
public class WorkloadIdentityTokenProvider extends AccessTokenProvider {
35+
36+
private static final String OAUTH2_TOKEN_PATH = "/oauth2/v2.0/token";
37+
private static final Logger LOG = LoggerFactory.getLogger(AccessTokenProvider.class);
38+
private static final String EMPTY_TOKEN_FILE_ERROR = "Empty token file found at specified path: ";
39+
private static final String TOKEN_FILE_READ_ERROR = "Error reading token file at specified path: ";
40+
41+
private final String authEndpoint;
42+
private final String clientId;
43+
private final String tokenFile;
44+
private long tokenFetchTime = -1;
45+
46+
public WorkloadIdentityTokenProvider(final String authority, final String tenantId,
47+
final String clientId, final String tokenFile) {
48+
Preconditions.checkNotNull(authority, "authority");
49+
Preconditions.checkNotNull(tenantId, "tenantId");
50+
Preconditions.checkNotNull(clientId, "clientId");
51+
Preconditions.checkNotNull(tokenFile, "tokenFile");
52+
53+
this.authEndpoint = authority + tenantId + OAUTH2_TOKEN_PATH;
54+
this.clientId = clientId;
55+
this.tokenFile = tokenFile;
56+
}
57+
58+
@Override
59+
protected AzureADToken refreshToken() throws IOException {
60+
LOG.debug("AADToken: refreshing token from JWT Assertion");
61+
String clientAssertion = getClientAssertion();
62+
AzureADToken token = getTokenUsingJWTAssertion(clientAssertion);
63+
tokenFetchTime = System.currentTimeMillis();
64+
return token;
65+
}
66+
67+
/**
68+
* Checks if the token is about to expire as per base expiry logic.
69+
* Otherwise, expire if there is a clock skew issue in the system.
70+
*
71+
* @return true if the token is expiring in next 1 hour or if a token has
72+
* never been fetched
73+
*/
74+
@Override
75+
protected boolean isTokenAboutToExpire() {
76+
if (tokenFetchTime == -1 || super.isTokenAboutToExpire()) {
77+
return true;
78+
}
79+
80+
// In case of, any clock skew issues, refresh token.
81+
long elapsedTimeSinceLastTokenRefreshInMillis =
82+
System.currentTimeMillis() - tokenFetchTime;
83+
boolean expiring = elapsedTimeSinceLastTokenRefreshInMillis < 0;
84+
if (expiring) {
85+
// Clock Skew issue. Refresh token.
86+
LOG.debug("JWTToken: token renewing. Time elapsed since last token fetch:"
87+
+ " {} milliseconds", elapsedTimeSinceLastTokenRefreshInMillis);
88+
}
89+
90+
return expiring;
91+
}
92+
93+
/**
94+
* Gets the client assertion from the token file.
95+
* The token file should contain the client assertion in JWT format.
96+
* It should be a String containing Base64Url encoded JSON Web Token (JWT).
97+
* See <a href="https://azure.github.io/azure-workload-identity/docs/faq.html#does-workload-identity-work-in-disconnected-environments">
98+
* Azure Workload Identity FAQ</a>.
99+
*
100+
* @return the client assertion.
101+
* @throws IOException if the token file is empty.
102+
*/
103+
private String getClientAssertion()
104+
throws IOException {
105+
String clientAssertion = "";
106+
try {
107+
File file = new File(tokenFile);
108+
clientAssertion = FileUtils.readFileToString(file, "UTF-8");
109+
} catch (Exception e) {
110+
throw new IOException(TOKEN_FILE_READ_ERROR + tokenFile, e);
111+
}
112+
if (Strings.isNullOrEmpty(clientAssertion)) {
113+
throw new IOException(EMPTY_TOKEN_FILE_ERROR + tokenFile);
114+
}
115+
return clientAssertion;
116+
}
117+
118+
/**
119+
* Gets the Azure AD token from a client assertion in JWT format.
120+
* This method exists to make unit testing possible.
121+
*
122+
* @param clientAssertion the client assertion.
123+
* @return the Azure AD token.
124+
* @throws IOException if there is a failure in connecting to Azure AD.
125+
*/
126+
@VisibleForTesting
127+
AzureADToken getTokenUsingJWTAssertion(String clientAssertion) throws IOException {
128+
return AzureADAuthenticator
129+
.getTokenUsingJWTAssertion(authEndpoint, clientId, clientAssertion);
130+
}
131+
132+
/**
133+
* Returns the last time the token was fetched from the token file.
134+
* This method exists to make unit testing possible.
135+
*
136+
* @return the time the token was last fetched.
137+
*/
138+
@VisibleForTesting
139+
long getTokenFetchTime() {
140+
return tokenFetchTime;
141+
}
142+
}

hadoop-tools/hadoop-azure/src/site/markdown/abfs.md

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -321,10 +321,9 @@ What can be changed is what secrets/credentials are used to authenticate the cal
321321

322322
The authentication mechanism is set in `fs.azure.account.auth.type` (or the
323323
account specific variant). The possible values are SharedKey, OAuth, Custom
324-
and SAS. For the various OAuth options use the config `fs.azure.account
325-
.oauth.provider.type`. Following are the implementations supported
326-
ClientCredsTokenProvider, UserPasswordTokenProvider, MsiTokenProvider and
327-
RefreshTokenBasedTokenProvider. An IllegalArgumentException is thrown if
324+
and SAS. For the various OAuth options use the config `fs.azure.account.oauth.provider.type`. Following are the implementations supported
325+
ClientCredsTokenProvider, UserPasswordTokenProvider, MsiTokenProvider,
326+
RefreshTokenBasedTokenProvider and WorkloadIdentityTokenProvider. An IllegalArgumentException is thrown if
328327
the specified provider type is not one of the supported.
329328

330329
All secrets can be stored in JCEKS files. These are encrypted and password
@@ -561,6 +560,54 @@ The Azure Portal/CLI is used to create the service identity.
561560
</property>
562561
```
563562

563+
### <a name="workload-identity"></a> Azure Workload Identity
564+
565+
[Azure Workload Identities](https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/overview), formerly "Azure AD pod identity".
566+
567+
OAuth 2.0 tokens are written to a file that is only accessible
568+
from the executing pod (`/var/run/secrets/azure/tokens/azure-identity-token`).
569+
The issued credentials can be used to authenticate.
570+
571+
The Azure Portal/CLI is used to create the service identity.
572+
573+
```xml
574+
<property>
575+
<name>fs.azure.account.auth.type</name>
576+
<value>OAuth</value>
577+
<description>
578+
Use OAuth authentication
579+
</description>
580+
</property>
581+
<property>
582+
<name>fs.azure.account.oauth.provider.type</name>
583+
<value>org.apache.hadoop.fs.azurebfs.oauth2.WorkloadIdentityTokenProvider</value>
584+
<description>
585+
Use Workload Identity for issuing OAuth tokens
586+
</description>
587+
</property>
588+
<property>
589+
<name>fs.azure.account.oauth2.msi.tenant</name>
590+
<value>${env.AZURE_TENANT_ID}</value>
591+
<description>
592+
Optional MSI Tenant ID
593+
</description>
594+
</property>
595+
<property>
596+
<name>fs.azure.account.oauth2.client.id</name>
597+
<value>${env.AZURE_CLIENT_ID}</value>
598+
<description>
599+
Optional Client ID
600+
</description>
601+
</property>
602+
<property>
603+
<name>fs.azure.account.oauth2.token.file</name>
604+
<value>${env.AZURE_FEDERATED_TOKEN_FILE}</value>
605+
<description>
606+
Token file path
607+
</description>
608+
</property>
609+
```
610+
564611
### Custom OAuth 2.0 Token Provider
565612

566613
A Custom OAuth 2.0 token provider supplies the ABFS connector with an OAuth 2.0

hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,42 @@ hierarchical namespace enabled, and set the following configuration settings:
879879
</property>
880880
-->
881881
882+
<!--2.5. If "WorkloadIdentityTokenProvider" is set as key provider, uncomment below and
883+
set tenant, client id and token file path.
884+
885+
All service principals must have federated identity credentials for Kubernetes.
886+
See Azure docs: https://learn.microsoft.com/en-us/azure/active-directory/workload-identities/workload-identity-federation-create-trust?pivots=identity-wif-apps-methods-azp#kubernetes
887+
888+
Retrieve the Azure identity token from kubernetes:
889+
1. Create AKS cluster with Workload Identity: https://learn.microsoft.com/en-us/azure/aks/workload-identity-deploy-cluster
890+
2. Create the pod:
891+
kubectl apply -f src/test/resources/workload-identity-pod.yaml
892+
3. After the pod is running, retrieve the identity token from the pod logs:
893+
kubectl logs pod/workload-identity
894+
4. Save the identity token to the token file path specified below.
895+
896+
The Azure identity token expires after 1 hour.
897+
-->
898+
<!--
899+
<property>
900+
<name>fs.azure.account.oauth2.msi.tenant.{ABFS_ACCOUNT_NAME}</name>
901+
<value>{tenantGuid}</value>
902+
<description>msi tenantGuid.</description>
903+
</property>
904+
905+
<property>
906+
<name>fs.azure.account.oauth2.client.id.{ABFS_ACCOUNT_NAME}</name>
907+
<value>{client id}</value>
908+
<description>AAD client id.</description>
909+
</property>
910+
911+
<property>
912+
<name>fs.azure.account.oauth2.client.token.file.{ABFS_ACCOUNT_NAME}</name>
913+
<value>{token file path}</value>
914+
<description>Azure identity token file path.</description>
915+
</property>
916+
-->
917+
882918
<!--
883919
<property>
884920
<name>fs.azure.identity.transformer.enable.short.name</name>

0 commit comments

Comments
 (0)