|
| 1 | +// Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +// or more contributor license agreements. See the NOTICE file |
| 3 | +// distributed with this work for additional information |
| 4 | +// regarding copyright ownership. The ASF licenses this file |
| 5 | +// to you under the Apache License, Version 2.0 (the |
| 6 | +// "License"); you may not use this file except in compliance |
| 7 | +// with the License. You may obtain a copy of the License at |
| 8 | +// |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +// |
| 11 | +// Unless required by applicable law or agreed to in writing, |
| 12 | +// software distributed under the License is distributed on an |
| 13 | +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +// KIND, either express or implied. See the License for the |
| 15 | +// specific language governing permissions and limitations |
| 16 | +// under the License. |
| 17 | + |
| 18 | +use crate::{PhysicalExpr, PhysicalSortExpr}; |
| 19 | +use arrow::compute::kernels::partition::lexicographical_partition_ranges; |
| 20 | +use arrow::compute::kernels::sort::{SortColumn, SortOptions}; |
| 21 | +use arrow::record_batch::RecordBatch; |
| 22 | +use arrow::{array::ArrayRef, datatypes::Field}; |
| 23 | +use datafusion_common::{DataFusionError, Result}; |
| 24 | +use std::any::Any; |
| 25 | +use std::fmt::Debug; |
| 26 | +use std::ops::Range; |
| 27 | +use std::sync::Arc; |
| 28 | + |
| 29 | +/// A window expression that: |
| 30 | +/// * knows its resulting field |
| 31 | +pub trait WindowExpr: Send + Sync + Debug { |
| 32 | + /// Returns the window expression as [`Any`](std::any::Any) so that it can be |
| 33 | + /// downcast to a specific implementation. |
| 34 | + fn as_any(&self) -> &dyn Any; |
| 35 | + |
| 36 | + /// the field of the final result of this window function. |
| 37 | + fn field(&self) -> Result<Field>; |
| 38 | + |
| 39 | + /// Human readable name such as `"MIN(c2)"` or `"RANK()"`. The default |
| 40 | + /// implementation returns placeholder text. |
| 41 | + fn name(&self) -> &str { |
| 42 | + "WindowExpr: default name" |
| 43 | + } |
| 44 | + |
| 45 | + /// expressions that are passed to the WindowAccumulator. |
| 46 | + /// Functions which take a single input argument, such as `sum`, return a single [`datafusion_expr::expr::Expr`], |
| 47 | + /// others (e.g. `cov`) return many. |
| 48 | + fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>>; |
| 49 | + |
| 50 | + /// evaluate the window function arguments against the batch and return |
| 51 | + /// array ref, normally the resulting vec is a single element one. |
| 52 | + fn evaluate_args(&self, batch: &RecordBatch) -> Result<Vec<ArrayRef>> { |
| 53 | + self.expressions() |
| 54 | + .iter() |
| 55 | + .map(|e| e.evaluate(batch)) |
| 56 | + .map(|r| r.map(|v| v.into_array(batch.num_rows()))) |
| 57 | + .collect() |
| 58 | + } |
| 59 | + |
| 60 | + /// evaluate the window function values against the batch |
| 61 | + fn evaluate(&self, batch: &RecordBatch) -> Result<ArrayRef>; |
| 62 | + |
| 63 | + /// evaluate the partition points given the sort columns; if the sort columns are |
| 64 | + /// empty then the result will be a single element vec of the whole column rows. |
| 65 | + fn evaluate_partition_points( |
| 66 | + &self, |
| 67 | + num_rows: usize, |
| 68 | + partition_columns: &[SortColumn], |
| 69 | + ) -> Result<Vec<Range<usize>>> { |
| 70 | + if partition_columns.is_empty() { |
| 71 | + Ok(vec![Range { |
| 72 | + start: 0, |
| 73 | + end: num_rows, |
| 74 | + }]) |
| 75 | + } else { |
| 76 | + Ok(lexicographical_partition_ranges(partition_columns) |
| 77 | + .map_err(DataFusionError::ArrowError)? |
| 78 | + .collect::<Vec<_>>()) |
| 79 | + } |
| 80 | + } |
| 81 | + |
| 82 | + /// expressions that's from the window function's partition by clause, empty if absent |
| 83 | + fn partition_by(&self) -> &[Arc<dyn PhysicalExpr>]; |
| 84 | + |
| 85 | + /// expressions that's from the window function's order by clause, empty if absent |
| 86 | + fn order_by(&self) -> &[PhysicalSortExpr]; |
| 87 | + |
| 88 | + /// get partition columns that can be used for partitioning, empty if absent |
| 89 | + fn partition_columns(&self, batch: &RecordBatch) -> Result<Vec<SortColumn>> { |
| 90 | + self.partition_by() |
| 91 | + .iter() |
| 92 | + .map(|expr| { |
| 93 | + PhysicalSortExpr { |
| 94 | + expr: expr.clone(), |
| 95 | + options: SortOptions::default(), |
| 96 | + } |
| 97 | + .evaluate_to_sort_column(batch) |
| 98 | + }) |
| 99 | + .collect() |
| 100 | + } |
| 101 | + |
| 102 | + /// get sort columns that can be used for peer evaluation, empty if absent |
| 103 | + fn sort_columns(&self, batch: &RecordBatch) -> Result<Vec<SortColumn>> { |
| 104 | + let mut sort_columns = self.partition_columns(batch)?; |
| 105 | + let order_by_columns = self |
| 106 | + .order_by() |
| 107 | + .iter() |
| 108 | + .map(|e| e.evaluate_to_sort_column(batch)) |
| 109 | + .collect::<Result<Vec<SortColumn>>>()?; |
| 110 | + sort_columns.extend(order_by_columns); |
| 111 | + Ok(sort_columns) |
| 112 | + } |
| 113 | +} |
0 commit comments