Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
293 changes: 217 additions & 76 deletions csharp/src/Drivers/BigQuery/BigQueryConnection.cs

Large diffs are not rendered by default.

16 changes: 14 additions & 2 deletions csharp/src/Drivers/BigQuery/BigQueryParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ namespace Apache.Arrow.Adbc.Drivers.BigQuery
/// <summary>
/// Parameters used for connecting to BigQuery data sources.
/// </summary>
public class BigQueryParameters
internal class BigQueryParameters
{
public const string AccessToken = "adbc.bigquery.access_token";
public const string AudienceUri = "adbc.bigquery.audience_uri";
Comment thread
davidhcoe marked this conversation as resolved.
public const string ProjectId = "adbc.bigquery.project_id";
public const string BillingProjectId = "adbc.bigquery.billing_project_id";
public const string ClientId = "adbc.bigquery.client_id";
Expand All @@ -36,6 +38,8 @@ public class BigQueryParameters
public const string Scopes = "adbc.bigquery.scopes";
public const string IncludeConstraintsWithGetObjects = "adbc.bigquery.include_constraints_getobjects";
public const string ClientTimeout = "adbc.bigquery.client.timeout";
public const string MaximumRetryAttempts = "adbc.bigquery.maximum_retries";
public const string RetryDelayMs = "adbc.bigquery.retry_delay_ms";
Comment thread
davidhcoe marked this conversation as resolved.
public const string GetQueryResultsOptionsTimeout = "adbc.bigquery.get_query_results_options.timeout";
public const string MaxFetchConcurrency = "adbc.bigquery.max_fetch_concurrency";
public const string IncludePublicProjectId = "adbc.bigquery.include_public_project_id";
Expand All @@ -47,13 +51,21 @@ public class BigQueryParameters
/// <summary>
/// Constants used for default parameter values.
/// </summary>
public class BigQueryConstants
internal class BigQueryConstants
{
public const string UserAuthenticationType = "user";
public const string EntraIdAuthenticationType = "aad";
public const string ServiceAccountAuthenticationType = "service";
public const string TokenEndpoint = "https://accounts.google.com/o/oauth2/token";
public const string TreatLargeDecimalAsString = "true";

// Entra ID / Azure AD constants
public const string EntraGrantType = "urn:ietf:params:oauth:grant-type:token-exchange";
public const string EntraSubjectTokenType = "urn:ietf:params:oauth:token-type:id_token";
public const string EntraRequestedTokenType = "urn:ietf:params:oauth:token-type:access_token";
public const string EntraIdScope = "https://www.googleapis.com/auth/cloud-platform";
public const string EntraStsTokenEndpoint = "https://sts.googleapis.com/v1/token";

// default value per https://pkg.go.dev/cloud.google.com/go/bigquery#section-readme
public const string DetectProjectId = "*detect-project-id*";
}
Expand Down
214 changes: 157 additions & 57 deletions csharp/src/Drivers/BigQuery/BigQueryStatement.cs

Large diffs are not rendered by default.

39 changes: 39 additions & 0 deletions csharp/src/Drivers/BigQuery/BigQueryStsTokenResponse.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System.Text.Json.Serialization;

namespace Apache.Arrow.Adbc.Drivers.BigQuery
{
/// <summary>
/// The token response from BigQuery
/// </summary>
internal class BigQueryStsTokenResponse
{
[JsonPropertyName("access_token")]
public string? AccessToken { get; set; }

[JsonPropertyName("issued_token_type")]
public string? IssuedTokenType { get; set; }

[JsonPropertyName("token_type")]
public string? TokenType { get; set; }

[JsonPropertyName("expires_in")]
public int? ExpiresIn { get; set; }
}
}
4 changes: 1 addition & 3 deletions csharp/src/Drivers/BigQuery/BigQueryTableTypes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System.Collections.Generic;

namespace Apache.Arrow.Adbc.Drivers.BigQuery
{
internal static class BigQueryTableTypes
{
public static readonly string[] TableTypes = new string[]{ "BASE TABLE", "VIEW", "CLONE", "SNAPSHOT" };
public static readonly string[] TableTypes = new string[] { "BASE TABLE", "VIEW", "CLONE", "SNAPSHOT" };
}
}
37 changes: 37 additions & 0 deletions csharp/src/Drivers/BigQuery/BigQueryUtils.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System;
using Google;

namespace Apache.Arrow.Adbc.Drivers.BigQuery
{
internal class BigQueryUtils
{
public static bool TokenRequiresUpdate(Exception ex)
{
bool result = false;

if (ex is GoogleApiException gaex && gaex.HttpStatusCode == System.Net.HttpStatusCode.Unauthorized)
{
result = true;
}

return result;
}
}
}
40 changes: 40 additions & 0 deletions csharp/src/Drivers/BigQuery/ITokenProtectedResource.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System;
using System.Threading.Tasks;

namespace Apache.Arrow.Adbc.Drivers.BigQuery
{
/// <summary>
/// Common interface for a token protected resource.
/// </summary>
internal interface ITokenProtectedResource
{
/// <summary>
/// The function to call when updating the token.
/// </summary>
Func<Task>? UpdateToken { get; set; }

/// <summary>
/// Determines the token needs to be updated.
/// </summary>
/// <param name="ex">The exception that occurs.</param>
/// <returns>True/False indicating a refresh is needed.</returns>
bool TokenRequiresUpdate(Exception ex);
}
}
82 changes: 82 additions & 0 deletions csharp/src/Drivers/BigQuery/RetryManager.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System;
using System.Threading.Tasks;

namespace Apache.Arrow.Adbc.Drivers.BigQuery
{
/// <summary>
/// Class that will retry calling a method with a backoff.
/// </summary>
internal class RetryManager
{
public static async Task<T> ExecuteWithRetriesAsync<T>(
ITokenProtectedResource tokenProtectedResource,
Func<Task<T>> action,
int maxRetries = 5,
int initialDelayMilliseconds = 200)
{
if (action == null)
{
throw new AdbcException("There is no method to retry", AdbcStatusCode.InvalidArgument);
}

int retryCount = 0;
int delay = initialDelayMilliseconds;

while (retryCount < maxRetries)
{
try
{
T result = await action();
return result;
}
catch (Exception ex)
{
retryCount++;
if (retryCount >= maxRetries)
{
if ((tokenProtectedResource?.UpdateToken != null))
{
if (tokenProtectedResource?.TokenRequiresUpdate(ex) == true)
{
throw new AdbcException($"Cannot update access token after {maxRetries} tries", AdbcStatusCode.Unauthenticated, ex);
}
}

throw new AdbcException($"Cannot execute {action.Method.Name} after {maxRetries} tries", AdbcStatusCode.UnknownError, ex);
}

if ((tokenProtectedResource?.UpdateToken != null))
{
if (tokenProtectedResource.TokenRequiresUpdate(ex) == true)
{
await tokenProtectedResource.UpdateToken();
}
}

await Task.Delay(delay);
delay = Math.Min(2 * delay, 5000);
}
}

throw new AdbcException($"Could not successfully call {action.Method.Name}", AdbcStatusCode.UnknownError);
}
}
}
60 changes: 60 additions & 0 deletions csharp/src/Drivers/BigQuery/TokenProtectedReadClient.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

using System;
using System.Threading.Tasks;
using Google.Apis.Auth.OAuth2;
using Google.Cloud.BigQuery.Storage.V1;

namespace Apache.Arrow.Adbc.Drivers.BigQuery
{
/// <summary>
/// Manages a <see cref="BigQueryReadClient"/> that is protected by a token.
/// </summary>
internal class TokenProtectedReadClientManger : ITokenProtectedResource
{
BigQueryReadClient bigQueryReadClient;

public TokenProtectedReadClientManger(GoogleCredential credential)
{
UpdateCredential(credential);

if (bigQueryReadClient == null)
{
throw new InvalidOperationException("could not create a read client");
}
}

public BigQueryReadClient ReadClient => bigQueryReadClient;

public void UpdateCredential(GoogleCredential? credential)
{
if (credential == null)
{
throw new ArgumentNullException(nameof(credential));
}

BigQueryReadClientBuilder readClientBuilder = new BigQueryReadClientBuilder();
readClientBuilder.Credential = credential;
this.bigQueryReadClient = readClientBuilder.Build();
}

public Func<Task>? UpdateToken { get; set; }

public bool TokenRequiresUpdate(Exception ex) => BigQueryUtils.TokenRequiresUpdate(ex);
}
}
38 changes: 32 additions & 6 deletions csharp/src/Drivers/BigQuery/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,17 @@ The ADBC driver passes the configured credentials to BigQuery, but you may need

The following parameters can be used to configure the driver behavior. The parameters are case sensitive.

**adbc.bigquery.access_token**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Sets the access token to use as the credential. Currently, this is for Microsoft Entra, but this could be used for other OAuth implementations as well.

**adbc.bigquery.audience_uri**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Sets the audience URI for the authentication token. Currently, this is for Microsoft Entra, but this could be used for other OAuth implementations as well.

**adbc.bigquery.allow_large_results**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Sets the [AllowLargeResults](https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.V2/latest/Google.Cloud.BigQuery.V2.QueryOptions#Google_Cloud_BigQuery_V2_QueryOptions_AllowLargeResults) value of the QueryOptions to `true` if configured; otherwise, the default is `false`.

**adbc.bigquery.auth_type**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Required. Must be `user` or `service`

https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.V2/latest/Google.Cloud.BigQuery.V2.QueryOptions#Google_Cloud_BigQuery_V2_QueryOptions_AllowLargeResults
&nbsp;&nbsp;&nbsp;&nbsp;Required. Must be `user`, `aad` (for Microsoft Entra) or `service`.

**adbc.bigquery.billing_project_id**<br>
&nbsp;&nbsp;&nbsp;&nbsp;The [Project ID](https://cloud.google.com/resource-manager/docs/creating-managing-projects) used for accessing billing BigQuery. If not specified, will default to the detected project ID.
Expand All @@ -60,6 +64,9 @@ https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.V2/latest/G
**adbc.bigquery.get_query_results_options.timeout**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Optional. Sets the timeout (in seconds) for the GetQueryResultsOptions value. If not set, defaults to 5 minutes. Similar to a CommandTimeout.

**adbc.bigquery.maximum_retries**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Optional. The maximum number of retries. Defaults to 5.

**adbc.bigquery.max_fetch_concurrency**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Optional. Sets the [maxStreamCount](https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.Storage.V1/latest/Google.Cloud.BigQuery.Storage.V1.BigQueryReadClient#Google_Cloud_BigQuery_Storage_V1_BigQueryReadClient_CreateReadSession_System_String_Google_Cloud_BigQuery_Storage_V1_ReadSession_System_Int32_Google_Api_Gax_Grpc_CallSettings_) for the CreateReadSession method. If not set, defaults to 1.

Expand All @@ -75,18 +82,21 @@ https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.V2/latest/G
**adbc.bigquery.include_constraints_getobjects**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Optional. Some callers do not need the constraint details when they get the table information and can improve the speed of obtaining the results. Setting this value to `"false"` will not include the constraint details. The default value is `"true"`.

**adbc.bigquery.include_public_project_id**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Include the `bigquery-public-data` project ID with the list of project IDs.

**adbc.bigquery.large_results_destination_table**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Optional. Sets the [DestinationTable](https://cloud.google.com/dotnet/docs/reference/Google.Cloud.BigQuery.V2/latest/Google.Cloud.BigQuery.V2.QueryOptions#Google_Cloud_BigQuery_V2_QueryOptions_DestinationTable) value of the QueryOptions if configured. Expects the format to be `{projectId}.{datasetId}.{tableId}` to set the corresponding values in the [TableReference](https://github.com/googleapis/google-api-dotnet-client/blob/6c415c73788b848711e47c6dd33c2f93c76faf97/Src/Generated/Google.Apis.Bigquery.v2/Google.Apis.Bigquery.v2.cs#L9348) class.

**adbc.bigquery.project_id**<br>
&nbsp;&nbsp;&nbsp;&nbsp;The [Project ID](https://cloud.google.com/resource-manager/docs/creating-managing-projects) used for accessing BigQuery. If not specified, will default to detect the projectIds the credentials have access to.

**adbc.bigquery.include_public_project_id**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Include the `bigquery-public-data` project ID with the list of project IDs.

**adbc.bigquery.refresh_token**<br>
&nbsp;&nbsp;&nbsp;&nbsp;The refresh token used for when the generated OAuth token expires. Required for `user` authentication.

**adbc.bigquery.retry_delay_ms**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Optional The delay between retries. Defaults to 200ms. The retries could take up to `adbc.bigquery.maximum_retries` x `adbc.bigquery.retry_delay_ms` to complete.

**adbc.bigquery.scopes**<br>
&nbsp;&nbsp;&nbsp;&nbsp;Optional. Comma separated list of scopes to include for the credential.

Expand Down Expand Up @@ -119,3 +129,19 @@ The following table depicts how the BigQuery ADBC driver converts a BigQuery typ
+A JSON string

See [Arrow Schema Details](https://cloud.google.com/bigquery/docs/reference/storage/#arrow_schema_details) for how BigQuery handles Arrow types.

## Microsoft Entra
The driver supports authenticating with a [Microsoft Entra](https://learn.microsoft.com/en-us/entra/fundamentals/what-is-entra) ID. For long running operations, the Entra token may timeout if the operation takes longer than the Entra token's lifetime. The driver has the ability to perform token refreshes by subscribing to the `UpdateToken` delegate on the `BigQueryConnection`. In this scenario, the driver will attempt to perform an operation. If that operation fails due to an Unauthorized error, then the token will be refreshed via the `UpdateToken` delegate.

Sample code to refresh the token:

```
Dictionary<string,string> properties = ...;
BigQueryConnection connection = new BigQueryConnection(properties);
connection.UpdateToken = () => Task.Run(() =>
{
connection.SetOption(BigQueryParameters.AccessToken, GetAccessToken());
});
```

In the sample above, when a new token is needed, the delegate is invoked and updates the `adbc.bigquery.access_token` parameter on the connection object.
Loading