Skip to content

Commit d94d2a6

Browse files
authored
Uses KAGGLE_KERNEL_INTEGRATIONS to determine the best default credent… (Kaggle#518)
* Uses KAGGLE_KERNEL_INTEGRATIONS to determine the best default credentials/client to use for bigquery. * Add more test cases for potential KERNEL_INTEGRATIONS values.
1 parent 697c501 commit d94d2a6

File tree

3 files changed

+52
-4
lines changed

3 files changed

+52
-4
lines changed

patches/kaggle_gcp.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,24 @@
55
from google.cloud.bigquery._http import Connection
66
from kaggle_secrets import UserSecretsClient
77

8+
def get_integrations():
9+
kernel_integrations_var = os.getenv("KAGGLE_KERNEL_INTEGRATIONS")
10+
kernel_integrations = KernelIntegrations()
11+
if kernel_integrations_var is None:
12+
return kernel_integrations
13+
map(lambda x: kernel_integrations.add_integration(x.lower()), kernel_integrations_var.split(':'))
14+
return kernel_integrations
15+
16+
class KernelIntegrations():
17+
def __init__(self):
18+
self.integrations = {}
19+
20+
def add_integration(self, integration_name):
21+
self.integrations[integration_name] = True
22+
23+
def has_bigquery(self):
24+
return 'bigquery' in self.integrations.keys()
25+
826

927
class KaggleKernelCredentials(credentials.Credentials):
1028
"""Custom Credentials used to authenticate using the Kernel's connected OAuth account.

patches/sitecustomize.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,29 @@
11
import os
2-
2+
from kaggle_gcp import get_integrations
33
kaggle_proxy_token = os.getenv("KAGGLE_DATA_PROXY_TOKEN")
4+
kernel_integrations_var = os.getenv("KAGGLE_KERNEL_INTEGRATIONS")
5+
46
bq_user_jwt = os.getenv("KAGGLE_BQ_USER_JWT")
57
if kaggle_proxy_token or bq_user_jwt:
68
from google.auth import credentials
79
from google.cloud import bigquery
810
from google.cloud.bigquery._http import Connection
911
# TODO: Update this to the correct kaggle.gcp path once we no longer inject modules
1012
# from the worker.
11-
from kaggle_gcp import PublicBigqueryClient
13+
from kaggle_gcp import PublicBigqueryClient, KaggleKernelCredentials
1214

1315
def monkeypatch_bq(bq_client, *args, **kwargs):
1416
data_proxy_project = os.getenv("KAGGLE_DATA_PROXY_PROJECT")
1517
specified_project = kwargs.get('project')
1618
specified_credentials = kwargs.get('credentials')
17-
if specified_project is None and specified_credentials is None:
19+
kernel_integrations = get_integrations()
20+
if specified_project is None and specified_credentials is None and not kernel_integrations.has_bigquery():
1821
print("Using Kaggle's public dataset BigQuery integration.")
1922
return PublicBigqueryClient(*args, **kwargs)
23+
2024
else:
25+
if specified_credentials is None:
26+
kwargs['credentials'] = KaggleKernelCredentials()
2127
return bq_client(*args, **kwargs)
2228

2329
# Monkey patches BigQuery client creation to use proxy or user-connected GCP account.

tests/test_bigquery.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,31 @@ def test_project_with_connected_account(self):
6262
with env:
6363
client = bigquery.Client(project='ANOTHER_PROJECT', credentials=KaggleKernelCredentials())
6464
self._test_proxy(client, should_use_proxy=False)
65-
65+
66+
def test_project_with_empty_integrations(self):
67+
env = EnvironmentVarGuard()
68+
env.set('KAGGLE_BQ_USER_JWT', 'foobar')
69+
env.set('KAGGLE_KERNEL_INTEGRATIONS', '')
70+
with env:
71+
client = bigquery.Client(project='ANOTHER_PROJECT', credentials=KaggleKernelCredentials())
72+
self._test_proxy(client, should_use_proxy=False)
73+
74+
def test_project_with_connected_account_default_credentials(self):
75+
env = EnvironmentVarGuard()
76+
env.set('KAGGLE_BQ_USER_JWT', 'foobar')
77+
env.set('KAGGLE_KERNEL_INTEGRATIONS', 'GCS:ANOTHER_ONE')
78+
with env:
79+
client = bigquery.Client(project='ANOTHER_PROJECT', credentials=KaggleKernelCredentials())
80+
self._test_proxy(client, should_use_proxy=False)
81+
82+
def test_project_with_connected_account_unrelated_integrations(self):
83+
env = EnvironmentVarGuard()
84+
env.set('KAGGLE_BQ_USER_JWT', 'foobar')
85+
env.set('KAGGLE_KERNEL_INTEGRATIONS', 'BIGQUERY')
86+
with env:
87+
client = bigquery.Client(project='ANOTHER_PROJECT')
88+
self._test_proxy(client, should_use_proxy=False)
89+
6690
def test_simultaneous_clients(self):
6791
env = EnvironmentVarGuard()
6892
env.set('KAGGLE_BQ_USER_JWT', 'foobar')

0 commit comments

Comments
 (0)