@@ -45,7 +45,11 @@ def ai_forecast(
4545 result_sql = self ._sql_generator .ai_forecast (
4646 source_sql = input_data .sql , options = options
4747 )
48- return self ._session .read_gbq (result_sql )
48+
49+ # TODO(b/395912450): Once the limitations with local data are
50+ # resolved, consider setting allow_large_results only when expected
51+ # data size is large.
52+ return self ._session .read_gbq_query (result_sql , allow_large_results = True )
4953
5054
5155class BqmlModel (BaseBqml ):
@@ -95,7 +99,17 @@ def _apply_ml_tvf(
9599 )
96100
97101 result_sql = apply_sql_tvf (input_sql )
98- df = self ._session .read_gbq (result_sql , index_col = index_col_ids )
102+ df = self ._session .read_gbq_query (
103+ result_sql ,
104+ index_col = index_col_ids ,
105+ # Many ML methods use nested JSON, which isn't yet compatible with
106+ # joining local results. Also, there is a chance that the results
107+ # are greater than 10 GB.
108+ # TODO(b/395912450): Once the limitations with local data are
109+ # resolved, consider setting allow_large_results only when expected
110+ # data size is large.
111+ allow_large_results = True ,
112+ )
99113 if df ._has_index :
100114 df .index .names = index_labels
101115 # Restore column labels
@@ -159,7 +173,10 @@ def explain_predict(
159173 def global_explain (self , options : Mapping [str , bool ]) -> bpd .DataFrame :
160174 sql = self ._sql_generator .ml_global_explain (struct_options = options )
161175 return (
162- self ._session .read_gbq (sql )
176+ # TODO(b/395912450): Once the limitations with local data are
177+ # resolved, consider setting allow_large_results only when expected
178+ # data size is large.
179+ self ._session .read_gbq_query (sql , allow_large_results = True )
163180 .sort_values (by = "attribution" , ascending = False )
164181 .set_index ("feature" )
165182 )
@@ -234,26 +251,49 @@ def forecast(self, options: Mapping[str, int | float]) -> bpd.DataFrame:
234251 sql = self ._sql_generator .ml_forecast (struct_options = options )
235252 timestamp_col_name = "forecast_timestamp"
236253 index_cols = [timestamp_col_name ]
237- first_col_name = self ._session .read_gbq (sql ).columns .values [0 ]
254+ # TODO(b/395912450): Once the limitations with local data are
255+ # resolved, consider setting allow_large_results only when expected
256+ # data size is large.
257+ first_col_name = self ._session .read_gbq_query (
258+ sql , allow_large_results = True
259+ ).columns .values [0 ]
238260 if timestamp_col_name != first_col_name :
239261 index_cols .append (first_col_name )
240- return self ._session .read_gbq (sql , index_col = index_cols ).reset_index ()
262+ # TODO(b/395912450): Once the limitations with local data are
263+ # resolved, consider setting allow_large_results only when expected
264+ # data size is large.
265+ return self ._session .read_gbq_query (
266+ sql , index_col = index_cols , allow_large_results = True
267+ ).reset_index ()
241268
242269 def explain_forecast (self , options : Mapping [str , int | float ]) -> bpd .DataFrame :
243270 sql = self ._sql_generator .ml_explain_forecast (struct_options = options )
244271 timestamp_col_name = "time_series_timestamp"
245272 index_cols = [timestamp_col_name ]
246- first_col_name = self ._session .read_gbq (sql ).columns .values [0 ]
273+ # TODO(b/395912450): Once the limitations with local data are
274+ # resolved, consider setting allow_large_results only when expected
275+ # data size is large.
276+ first_col_name = self ._session .read_gbq_query (
277+ sql , allow_large_results = True
278+ ).columns .values [0 ]
247279 if timestamp_col_name != first_col_name :
248280 index_cols .append (first_col_name )
249- return self ._session .read_gbq (sql , index_col = index_cols ).reset_index ()
281+ # TODO(b/395912450): Once the limitations with local data are
282+ # resolved, consider setting allow_large_results only when expected
283+ # data size is large.
284+ return self ._session .read_gbq_query (
285+ sql , index_col = index_cols , allow_large_results = True
286+ ).reset_index ()
250287
251288 def evaluate (self , input_data : Optional [bpd .DataFrame ] = None ):
252289 sql = self ._sql_generator .ml_evaluate (
253290 input_data .sql if (input_data is not None ) else None
254291 )
255292
256- return self ._session .read_gbq (sql )
293+ # TODO(b/395912450): Once the limitations with local data are
294+ # resolved, consider setting allow_large_results only when expected
295+ # data size is large.
296+ return self ._session .read_gbq_query (sql , allow_large_results = True )
257297
258298 def llm_evaluate (
259299 self ,
@@ -262,42 +302,62 @@ def llm_evaluate(
262302 ):
263303 sql = self ._sql_generator .ml_llm_evaluate (input_data .sql , task_type )
264304
265- return self ._session .read_gbq (sql )
305+ # TODO(b/395912450): Once the limitations with local data are
306+ # resolved, consider setting allow_large_results only when expected
307+ # data size is large.
308+ return self ._session .read_gbq_query (sql , allow_large_results = True )
266309
267310 def arima_evaluate (self , show_all_candidate_models : bool = False ):
268311 sql = self ._sql_generator .ml_arima_evaluate (show_all_candidate_models )
269312
270- return self ._session .read_gbq (sql )
313+ # TODO(b/395912450): Once the limitations with local data are
314+ # resolved, consider setting allow_large_results only when expected
315+ # data size is large.
316+ return self ._session .read_gbq_query (sql , allow_large_results = True )
271317
272318 def arima_coefficients (self ) -> bpd .DataFrame :
273319 sql = self ._sql_generator .ml_arima_coefficients ()
274320
275- return self ._session .read_gbq (sql )
321+ # TODO(b/395912450): Once the limitations with local data are
322+ # resolved, consider setting allow_large_results only when expected
323+ # data size is large.
324+ return self ._session .read_gbq_query (sql , allow_large_results = True )
276325
277326 def centroids (self ) -> bpd .DataFrame :
278327 assert self ._model .model_type == "KMEANS"
279328
280329 sql = self ._sql_generator .ml_centroids ()
281330
282- return self ._session .read_gbq (
283- sql , index_col = ["centroid_id" , "feature" ]
331+ # TODO(b/395912450): Once the limitations with local data are
332+ # resolved, consider setting allow_large_results only when expected
333+ # data size is large.
334+ return self ._session .read_gbq_query (
335+ sql , index_col = ["centroid_id" , "feature" ], allow_large_results = True
284336 ).reset_index ()
285337
286338 def principal_components (self ) -> bpd .DataFrame :
287339 assert self ._model .model_type == "PCA"
288340
289341 sql = self ._sql_generator .ml_principal_components ()
290342
291- return self ._session .read_gbq (
292- sql , index_col = ["principal_component_id" , "feature" ]
343+ # TODO(b/395912450): Once the limitations with local data are
344+ # resolved, consider setting allow_large_results only when expected
345+ # data size is large.
346+ return self ._session .read_gbq_query (
347+ sql ,
348+ index_col = ["principal_component_id" , "feature" ],
349+ allow_large_results = True ,
293350 ).reset_index ()
294351
295352 def principal_component_info (self ) -> bpd .DataFrame :
296353 assert self ._model .model_type == "PCA"
297354
298355 sql = self ._sql_generator .ml_principal_component_info ()
299356
300- return self ._session .read_gbq (sql )
357+ # TODO(b/395912450): Once the limitations with local data are
358+ # resolved, consider setting allow_large_results only when expected
359+ # data size is large.
360+ return self ._session .read_gbq_query (sql , allow_large_results = True )
301361
302362 def copy (self , new_model_name : str , replace : bool = False ) -> BqmlModel :
303363 job_config = self ._session ._prepare_copy_job_config ()
0 commit comments