Skip to content

Commit 3e4821d

Browse files
committed
Polars: operation in aggregation context
1 parent 63fb0fc commit 3e4821d

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

polars/groupby-polars.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@
194194
question = "max v1 - min v2 by id3" # q7
195195
gc.collect()
196196
t_start = timeit.default_timer()
197-
ans = x.groupby("id3").agg([pl.max("v1"), pl.min("v2")]).select(["id3", (col("v1_max")-col("v2_min")).alias("range_v1_v2")]).collect()
197+
ans = x.groupby("id3").agg([(pl.max("v1") - pl.min("v2")).alias("range_v1_v2")]).collect()
198198
print(ans.shape, flush=True)
199199
t = timeit.default_timer() - t_start
200200
m = memory_usage()
@@ -205,7 +205,7 @@
205205
del ans
206206
gc.collect()
207207
t_start = timeit.default_timer()
208-
ans = x.groupby("id3").agg([pl.max("v1"), pl.min("v2")]).select(["id3", (col("v1_max")-col("v2_min")).alias("range_v1_v2")]).collect()
208+
ans = x.groupby("id3").agg([(pl.max("v1") - pl.min("v2")).alias("range_v1_v2")]).collect()
209209
print(ans.shape, flush=True)
210210
t = timeit.default_timer() - t_start
211211
m = memory_usage()
@@ -220,7 +220,7 @@
220220
question = "largest two v3 by id6" # q8
221221
gc.collect()
222222
t_start = timeit.default_timer()
223-
ans = x.drop_nulls("v3").sort("v3", reverse=True).groupby("id6").agg(col("v3").head(2).alias("largest2_v3")).explode("largest2_v3").collect()
223+
ans = x.drop_nulls("v3").groupby("id6").agg(col("v3").sort(reverse=True).head(2).alias("largest2_v3")).explode("largest2_v3").collect()
224224
print(ans.shape, flush=True)
225225
t = timeit.default_timer() - t_start
226226
m = memory_usage()
@@ -231,7 +231,7 @@
231231
del ans
232232
gc.collect()
233233
t_start = timeit.default_timer()
234-
ans = x.drop_nulls("v3").sort("v3", reverse=True).groupby("id6").agg(col("v3").head(2).alias("largest2_v3")).explode("largest2_v3").collect()
234+
ans = x.drop_nulls("v3").groupby("id6").agg(col("v3").sort(reverse=True).head(2).alias("largest2_v3")).explode("largest2_v3").collect()
235235
print(ans.shape, flush=True)
236236
t = timeit.default_timer() - t_start
237237
m = memory_usage()

0 commit comments

Comments
 (0)