@@ -243,46 +243,37 @@ def peek(
243
243
plan , ordered = False , destination = destination_table , peek = n_rows
244
244
)
245
245
246
- def head (
247
- self , array_value : bigframes .core .ArrayValue , n_rows : int
248
- ) -> executor .ExecuteResult :
249
- plan = self .logical_plan (array_value .node )
250
- if (plan .row_count is not None ) and (plan .row_count <= n_rows ):
251
- return self ._execute_plan (plan , ordered = True )
252
-
253
- if not self .strictly_ordered and not array_value .node .explicitly_ordered :
254
- # No user-provided ordering, so just get any N rows, its faster!
255
- return self .peek (array_value , n_rows )
256
-
257
- if not tree_properties .can_fast_head (plan ):
258
- # If can't get head fast, we are going to need to execute the whole query
259
- # Will want to do this in a way such that the result is reusable, but the first
260
- # N values can be easily extracted.
261
- # This currently requires clustering on offsets.
262
- self ._cache_with_offsets (array_value )
263
- # Get a new optimized plan after caching
264
- plan = self .logical_plan (array_value .node )
265
- assert tree_properties .can_fast_head (plan )
266
-
267
- head_plan = generate_head_plan (plan , n_rows )
268
- return self ._execute_plan (head_plan , ordered = True )
269
-
270
246
def cached (
271
- self ,
272
- array_value : bigframes .core .ArrayValue ,
273
- * ,
274
- force : bool = False ,
275
- use_session : bool = False ,
276
- cluster_cols : Sequence [str ] = (),
247
+ self , array_value : bigframes .core .ArrayValue , * , config : executor .CacheConfig
277
248
) -> None :
278
249
"""Write the block to a session table."""
279
- # use a heuristic for whether something needs to be cached
280
- if (not force ) and self ._is_trivially_executable (array_value ):
281
- return
282
- if use_session :
250
+ # First, see if we can reuse the existing cache
251
+ # TODO(b/415105423): Provide feedback to user on whether new caching action was deemed necessary
252
+ # TODO(b/415105218): Make cached a deferred action
253
+ if config .if_cached == "reuse-any" :
254
+ if self ._is_trivially_executable (array_value ):
255
+ return
256
+ elif config .if_cached == "reuse-strict" :
257
+ # This path basically exists to make sure that repr in head mode is optimized for subsequent repr operations.
258
+ if config .optimize_for == "head" :
259
+ if tree_properties .can_fast_head (array_value .node ):
260
+ return
261
+ else :
262
+ raise NotImplementedError (
263
+ "if_cached='reuse-strict' currently only supported with optimize_for='head'"
264
+ )
265
+ elif config .if_cached != "replace" :
266
+ raise ValueError (f"Unexpected 'if_cached' arg: { config .if_cached } " )
267
+
268
+ if config .optimize_for == "auto" :
283
269
self ._cache_with_session_awareness (array_value )
270
+ elif config .optimize_for == "head" :
271
+ self ._cache_with_offsets (array_value )
284
272
else :
285
- self ._cache_with_cluster_cols (array_value , cluster_cols = cluster_cols )
273
+ assert isinstance (config .optimize_for , executor .HierarchicalKey )
274
+ self ._cache_with_cluster_cols (
275
+ array_value , cluster_cols = config .optimize_for .columns
276
+ )
286
277
287
278
# Helpers
288
279
def _run_execute_query (
@@ -571,7 +562,3 @@ def _sanitize(
571
562
)
572
563
for f in schema
573
564
)
574
-
575
-
576
- def generate_head_plan (node : nodes .BigFrameNode , n : int ):
577
- return nodes .SliceNode (node , start = None , stop = n )
0 commit comments