|
1 |
| -from google.auth import credentials |
2 |
| -from google.cloud import bigquery |
3 |
| -from google.cloud.bigquery._http import Connection |
4 | 1 | import os
|
5 | 2 |
|
6 |
| - |
7 |
| -class KaggleKernelCredentials(credentials.Credentials): |
8 |
| - """Custom Credentials used to authenticate using the Kernel's connected OAuth account.""" |
9 |
| - |
10 |
| - def refresh(self, request): |
11 |
| - print("Calling Kaggle.UserSecrets to refresh token.") |
12 |
| - # Set self.token and self.expiry here. |
13 |
| - raise NotImplementedError("Private BigQuery integration is not yet implemented.") |
14 |
| - |
15 | 3 | kaggle_proxy_token = os.getenv("KAGGLE_DATA_PROXY_TOKEN")
|
16 |
| -CONNECTION_BASE_URL = Connection.API_BASE_URL |
17 |
| - |
18 |
| - |
19 |
| -def monkeypatch_bq(bq_client, *args, **kwargs): |
20 |
| - data_proxy_project = os.getenv("KAGGLE_DATA_PROXY_PROJECT") |
21 |
| - bq_user_jwt = os.getenv("KAGGLE_BQ_USER_JWT") |
22 |
| - specified_project = kwargs.get('project') |
23 |
| - # Use Data Proxy if user has specified to use the Kaggle project, or if |
24 |
| - # there are no connected GCP accounts (to maintain backwards compatibility). |
25 |
| - if bq_user_jwt is None and specified_project and specified_project.lower() != 'kaggle': |
26 |
| - raise Exception("In order to query a private BigQuery project, please connect a GCP account. " |
27 |
| - "Otherwise specify 'kaggle' as the project to use Kaggle's public dataset BigQuery integration.") |
28 |
| - use_data_proxy = (specified_project and specified_project.lower() == 'kaggle') or bq_user_jwt is None |
29 |
| - if use_data_proxy: |
30 |
| - if data_proxy_project is None or kaggle_proxy_token is None: |
31 |
| - # We don't have the data proxy info so leave the bq client unmodified. |
32 |
| - return bq_client(*args, **kwargs) |
33 |
| - print("Using Kaggle's public dataset BigQuery integration.") |
34 |
| - Connection.API_BASE_URL = os.getenv("KAGGLE_DATA_PROXY_URL") |
35 |
| - Connection._EXTRA_HEADERS["X-KAGGLE-PROXY-DATA"] = kaggle_proxy_token |
36 |
| - anon_credentials = credentials.AnonymousCredentials() |
37 |
| - anon_credentials.refresh = lambda *args: None |
38 |
| - kwargs['project'] = data_proxy_project |
39 |
| - return bq_client( |
40 |
| - *args, |
41 |
| - credentials=anon_credentials, |
42 |
| - **kwargs) |
43 |
| - else: |
44 |
| - Connection.API_BASE_URL = CONNECTION_BASE_URL |
45 |
| - Connection._EXTRA_HEADERS.pop('X-KAGGLE-PROXY-DATA', None) |
46 |
| - if kwargs.get('credentials') is not None: |
47 |
| - # The user wants to use their own credentials scheme, don't try to interfere. |
| 4 | +bq_user_jwt = os.getenv("KAGGLE_BQ_USER_JWT") |
| 5 | +if (kaggle_proxy_token or bq_user_jwt): |
| 6 | + from google.auth import credentials |
| 7 | + from google.cloud import bigquery |
| 8 | + from google.cloud.bigquery._http import Connection |
| 9 | + from kaggle import kaggle_bq_client |
| 10 | + |
| 11 | + def monkeypatch_bq(bq_client, *args, **kwargs): |
| 12 | + data_proxy_project = os.getenv("KAGGLE_DATA_PROXY_PROJECT") |
| 13 | + specified_project = kwargs.get('project') |
| 14 | + specified_credentials = kwargs.get('credentials') |
| 15 | + if specified_project is None and specified_credentials is None: |
| 16 | + print("Using Kaggle's public dataset BigQuery integration.") |
| 17 | + return kaggle_bq_client(*args, **kwargs) |
| 18 | + else: |
48 | 19 | return bq_client(*args, **kwargs)
|
49 |
| - print("Using enabled BigQuery integration.") |
50 |
| - kwargs['credentials'] = KaggleKernelCredentials() |
51 |
| - return bq_client( |
52 |
| - *args, |
53 |
| - **kwargs) |
54 |
| - |
55 |
| -# Monkey patches BigQuery client creation to use proxy or user. |
56 |
| -bq_client = bigquery.Client |
57 |
| -bigquery.Client = lambda *args, **kwargs: monkeypatch_bq(bq_client, *args, **kwargs) |
58 | 20 |
|
| 21 | + # Monkey patches BigQuery client creation to use proxy or user-connected GCP account. |
| 22 | + # TODO: Remove monkeypatching altogether and move to using a Kaggle library for Data Proxy and |
| 23 | + # KaggleKernelCredentials with vanilla bq client for connected account. |
| 24 | + bq_client = bigquery.Client |
| 25 | + bigquery.Client = lambda *args, **kwargs: monkeypatch_bq( |
| 26 | + bq_client, *args, **kwargs) |
0 commit comments