|
19 | 19 |
|
20 | 20 | use crate::error::{DataFusionError, Result}; |
21 | 21 | use crate::physical_plan::{ |
22 | | - aggregates, window_functions::WindowFunction, AggregateExpr, Distribution, |
23 | | - ExecutionPlan, Partitioning, PhysicalExpr, SendableRecordBatchStream, WindowExpr, |
| 22 | + aggregates, expressions::Column, window_functions::WindowFunction, AggregateExpr, |
| 23 | + Distribution, ExecutionPlan, Partitioning, PhysicalExpr, RecordBatchStream, |
| 24 | + SendableRecordBatchStream, WindowExpr, |
| 25 | +}; |
| 26 | +use arrow::{ |
| 27 | + array::{Array, UInt32Builder}, |
| 28 | + datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}, |
| 29 | + error::{ArrowError, Result as ArrowResult}, |
| 30 | + record_batch::RecordBatch, |
24 | 31 | }; |
25 | | -use arrow::datatypes::{Field, Schema, SchemaRef}; |
26 | 32 | use async_trait::async_trait; |
| 33 | +use futures::stream::Stream; |
| 34 | +use futures::stream::StreamExt; |
| 35 | +use pin_project_lite::pin_project; |
27 | 36 | use std::any::Any; |
| 37 | +use std::pin::Pin; |
28 | 38 | use std::sync::Arc; |
| 39 | +use std::task::{Context, Poll}; |
29 | 40 |
|
30 | 41 | /// Window execution plan |
31 | 42 | #[derive(Debug)] |
@@ -195,43 +206,89 @@ impl ExecutionPlan for WindowAggExec { |
195 | 206 |
|
196 | 207 | let input = self.input.execute(partition).await?; |
197 | 208 |
|
198 | | - |
199 | | - |
200 | | - |
201 | 209 | Err(DataFusionError::NotImplemented( |
202 | 210 | "WindowAggExec::execute".to_owned(), |
203 | 211 | )) |
204 | 212 | } |
205 | 213 | } |
206 | 214 |
|
207 | | -// struct WindowAggStream { |
208 | | -// scheme: SchemaRef, |
209 | | -// window_expr: Vec<Arc<dyn WindowExpr>>, |
210 | | -// input: SendableRecordBatchStream, |
211 | | -// } |
212 | | - |
213 | | -// impl Stream for WindowAggStream { |
214 | | -// type Item = ArrowResult<RecordBatch>; |
215 | | - |
216 | | -// fn poll_next( |
217 | | -// mut self: Pin<&mut Self>, |
218 | | -// cx: &mut Context<'_>, |
219 | | -// ) -> Poll<Option<Self::Item>> { |
220 | | -// self.input.poll_next_unpin(cx).map(|x| match x { |
221 | | -// Some(Ok(batch)) => Some(batch_project(&batch, &self.expr, &self.schema)), |
222 | | -// other => other, |
223 | | -// }) |
224 | | -// } |
225 | | - |
226 | | -// fn size_hint(&self) -> (usize, Option<usize>) { |
227 | | -// // same number of record batches |
228 | | -// self.input.size_hint() |
229 | | -// } |
230 | | -// } |
231 | | - |
232 | | -// impl RecordBatchStream for WindowAggStream { |
233 | | -// /// Get the schema |
234 | | -// fn schema(&self) -> SchemaRef { |
235 | | -// self.schema.clone() |
236 | | -// } |
237 | | -// } |
| 215 | +pin_project! { |
| 216 | + /// stream for window aggregation plan |
| 217 | + pub struct WindowAggStream { |
| 218 | + #[pin] |
| 219 | + output: futures::channel::oneshot::Receiver<ArrowResult<Option<RecordBatch>>>, |
| 220 | + finished: bool, |
| 221 | + schema: SchemaRef, |
| 222 | + } |
| 223 | +} |
| 224 | + |
| 225 | +async fn compute_window_aggregate( |
| 226 | + schema: SchemaRef, |
| 227 | + window_expr: Vec<Arc<dyn WindowExpr>>, |
| 228 | + mut input: SendableRecordBatchStream, |
| 229 | +) -> ArrowResult<Option<RecordBatch>> { |
| 230 | + unimplemented!("not implemented") |
| 231 | +} |
| 232 | + |
| 233 | +impl WindowAggStream { |
| 234 | + fn new( |
| 235 | + schema: SchemaRef, |
| 236 | + input: SendableRecordBatchStream, |
| 237 | + window_expr: Vec<Arc<dyn WindowExpr>>, |
| 238 | + ) -> Self { |
| 239 | + let (tx, rx) = futures::channel::oneshot::channel(); |
| 240 | + let schema_clone = schema.clone(); |
| 241 | + tokio::spawn(async move { |
| 242 | + let result = compute_window_aggregate(schema_clone, window_expr, input).await; |
| 243 | + tx.send(result) |
| 244 | + }); |
| 245 | + |
| 246 | + Self { |
| 247 | + output: rx, |
| 248 | + finished: false, |
| 249 | + schema, |
| 250 | + } |
| 251 | + } |
| 252 | +} |
| 253 | + |
| 254 | +impl Stream for WindowAggStream { |
| 255 | + type Item = ArrowResult<RecordBatch>; |
| 256 | + |
| 257 | + fn poll_next( |
| 258 | + mut self: Pin<&mut Self>, |
| 259 | + cx: &mut Context<'_>, |
| 260 | + ) -> Poll<Option<Self::Item>> { |
| 261 | + if self.finished { |
| 262 | + return Poll::Ready(None); |
| 263 | + } |
| 264 | + |
| 265 | + // is the output ready? |
| 266 | + let this = self.project(); |
| 267 | + let output_poll = this.output.poll(cx); |
| 268 | + |
| 269 | + match output_poll { |
| 270 | + Poll::Ready(result) => { |
| 271 | + *this.finished = true; |
| 272 | + // check for error in receiving channel and unwrap actual result |
| 273 | + let result = match result { |
| 274 | + Err(e) => Err(ArrowError::ExternalError(Box::new(e))), // error receiving |
| 275 | + Ok(result) => result, |
| 276 | + }; |
| 277 | + Poll::Ready(Some(result)) |
| 278 | + } |
| 279 | + Poll::Pending => Poll::Pending, |
| 280 | + } |
| 281 | + } |
| 282 | + |
| 283 | + fn size_hint(&self) -> (usize, Option<usize>) { |
| 284 | + // same number of record batches |
| 285 | + self.input.size_hint() |
| 286 | + } |
| 287 | +} |
| 288 | + |
| 289 | +impl RecordBatchStream for WindowAggStream { |
| 290 | + /// Get the schema |
| 291 | + fn schema(&self) -> SchemaRef { |
| 292 | + self.schema.clone() |
| 293 | + } |
| 294 | +} |
0 commit comments