|
194 | 194 | question = "max v1 - min v2 by id3" # q7
|
195 | 195 | gc.collect()
|
196 | 196 | t_start = timeit.default_timer()
|
197 |
| -ans = x.groupby("id3").agg([pl.max("v1"), pl.min("v2")]).select(["id3", (col("v1_max")-col("v2_min")).alias("range_v1_v2")]).collect() |
| 197 | +ans = x.groupby("id3").agg([(pl.max("v1") - pl.min("v2")).alias("range_v1_v2")]).collect() |
198 | 198 | print(ans.shape, flush=True)
|
199 | 199 | t = timeit.default_timer() - t_start
|
200 | 200 | m = memory_usage()
|
|
205 | 205 | del ans
|
206 | 206 | gc.collect()
|
207 | 207 | t_start = timeit.default_timer()
|
208 |
| -ans = x.groupby("id3").agg([pl.max("v1"), pl.min("v2")]).select(["id3", (col("v1_max")-col("v2_min")).alias("range_v1_v2")]).collect() |
| 208 | +ans = x.groupby("id3").agg([(pl.max("v1") - pl.min("v2")).alias("range_v1_v2")]).collect() |
209 | 209 | print(ans.shape, flush=True)
|
210 | 210 | t = timeit.default_timer() - t_start
|
211 | 211 | m = memory_usage()
|
|
220 | 220 | question = "largest two v3 by id6" # q8
|
221 | 221 | gc.collect()
|
222 | 222 | t_start = timeit.default_timer()
|
223 |
| -ans = x.drop_nulls("v3").sort("v3", reverse=True).groupby("id6").agg(col("v3").head(2).alias("largest2_v3")).explode("largest2_v3").collect() |
| 223 | +ans = x.drop_nulls("v3").groupby("id6").agg(col("v3").sort(reverse=True).head(2).alias("largest2_v3")).explode("largest2_v3").collect() |
224 | 224 | print(ans.shape, flush=True)
|
225 | 225 | t = timeit.default_timer() - t_start
|
226 | 226 | m = memory_usage()
|
|
231 | 231 | del ans
|
232 | 232 | gc.collect()
|
233 | 233 | t_start = timeit.default_timer()
|
234 |
| -ans = x.drop_nulls("v3").sort("v3", reverse=True).groupby("id6").agg(col("v3").head(2).alias("largest2_v3")).explode("largest2_v3").collect() |
| 234 | +ans = x.drop_nulls("v3").groupby("id6").agg(col("v3").sort(reverse=True).head(2).alias("largest2_v3")).explode("largest2_v3").collect() |
235 | 235 | print(ans.shape, flush=True)
|
236 | 236 | t = timeit.default_timer() - t_start
|
237 | 237 | m = memory_usage()
|
|
0 commit comments