Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions vortex-array/src/arrays/expr/array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_dtype::DType;
use vortex_error::{VortexResult, vortex_ensure};

use crate::expr::Expression;
use crate::stats::ArrayStats;
use crate::{Array, ArrayRef};

/// A array that represents an expression to be evaluated over a child array.
///
/// `ExprArray` enables deferred evaluation of expressions by wrapping a child array
/// with an expression that operates on it. The expression is not evaluated until the
/// array is canonicalized/executed.
///
/// # Examples
///
/// ```ignore
/// // Create an expression that filters an integer array
/// let data = PrimitiveArray::from_iter([1, 2, 3, 4, 5]);
/// let expr = gt(root(), lit(3)); // $ > 3
/// let expr_array = ExprArray::new_infer_dtype(data.into_array(), expr)?;
///
/// // The expression is evaluated when canonicalized
/// let result = expr_array.to_canonical(); // Returns BoolArray([false, false, false, true, true])
/// ```
///
/// # Type Safety
///
/// The `dtype` field must match `expr.return_dtype(child.dtype())`. This invariant
/// is enforced by the safe constructors ([`try_new`](ExprArray::try_new) and
/// [`new_infer_dtype`](ExprArray::new_infer_dtype)) but can be bypassed
/// with [`unchecked_new`](ExprArray::unchecked_new) for performance-critical code.
#[derive(Clone, Debug)]
pub struct ExprArray {
/// The underlying array that the expression will operate on.
pub(super) child: ArrayRef,
/// The expression to evaluate over the child array.
pub(super) expr: Expression,
/// The data type of the result after evaluating the expression.
pub(super) dtype: DType,
/// Statistics about the resulting array (may be computed lazily).
pub(super) stats: ArrayStats,
}

impl ExprArray {
/// Creates a new ExprArray with the dtype validated to match the expression's return type.
pub fn try_new(child: ArrayRef, expr: Expression, dtype: DType) -> VortexResult<Self> {
let expected_dtype = expr.return_dtype(child.dtype())?;
vortex_ensure!(
dtype == expected_dtype,
"ExprArray dtype mismatch: expected {}, got {}",
expected_dtype,
dtype
);
Ok(unsafe { Self::unchecked_new(child, expr, dtype) })
}

/// Create a new ExprArray without validating that the dtype matches the expression's return type.
///
/// # Safety
///
/// The caller must ensure that `dtype` matches `expr.return_dtype(child.dtype())`.
/// Violating this invariant may lead to incorrect results or panics when the array is used.
pub unsafe fn unchecked_new(child: ArrayRef, expr: Expression, dtype: DType) -> Self {
Self {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I quite like the pattern we have started to use in vortex-vector where we have new that calls try_new().expect(), and new_unchecked (note not unchecked_new) has if cfg(debug_assertions) { try_new().expect() } else { Self { ... }}

child,
expr,
dtype,
// TODO(joe): Propagate or compute statistics from the child array and expression.
stats: ArrayStats::default(),
}
}

/// Creates a new ExprArray with the dtype inferred from the expression and child.
pub fn new_infer_dtype(child: ArrayRef, expr: Expression) -> VortexResult<Self> {
let dtype = expr.return_dtype(child.dtype())?;
Ok(unsafe { Self::unchecked_new(child, expr, dtype) })
}

pub fn child(&self) -> &ArrayRef {
&self.child
}

pub fn expr(&self) -> &Expression {
&self.expr
}
}
8 changes: 8 additions & 0 deletions vortex-array/src/arrays/expr/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

mod array;
pub use array::ExprArray;

mod vtable;
pub use vtable::{ExprEncoding, ExprVTable};
38 changes: 38 additions & 0 deletions vortex-array/src/arrays/expr/vtable/array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::hash::Hash;

use vortex_dtype::DType;

use crate::Precision;
use crate::arrays::expr::{ExprArray, ExprVTable};
use crate::hash::{ArrayEq, ArrayHash};
use crate::stats::StatsSetRef;
use crate::vtable::ArrayVTable;

impl ArrayVTable<ExprVTable> for ExprVTable {
fn len(array: &ExprArray) -> usize {
array.child.len()
}

fn dtype(array: &ExprArray) -> &DType {
&array.dtype
}

fn stats(array: &ExprArray) -> StatsSetRef<'_> {
array.stats.to_ref(array.as_ref())
}

fn array_hash<H: std::hash::Hasher>(array: &ExprArray, state: &mut H, precision: Precision) {
array.child.array_hash(state, precision);
array.dtype.hash(state);
array.expr.hash(state)
}

fn array_eq(array: &ExprArray, other: &ExprArray, precision: Precision) -> bool {
array.child.array_eq(&other.child, precision)
&& array.dtype == other.dtype
&& array.expr == other.expr
}
}
48 changes: 48 additions & 0 deletions vortex-array/src/arrays/expr/vtable/canonical.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_error::VortexExpect;

use crate::Canonical;
use crate::arrays::expr::{ExprArray, ExprVTable};
use crate::vtable::CanonicalVTable;

impl CanonicalVTable<ExprVTable> for ExprVTable {
fn canonicalize(array: &ExprArray) -> Canonical {
array
.expr
.evaluate(&array.child)
.vortex_expect("Failed to evaluate expression")
.to_canonical()
}
}

#[cfg(test)]
mod tests {
use vortex_buffer::buffer;
use vortex_dtype::Nullability::NonNullable;
use vortex_dtype::{DType, PType};

use crate::arrays::expr::ExprArray;
use crate::arrays::primitive::PrimitiveArray;
use crate::expr::binary::checked_add;
use crate::expr::literal::lit;
use crate::validity::Validity;
use crate::{Array, IntoArray, assert_arrays_eq};

#[test]
fn test_expr_array_canonicalize() {
let child = PrimitiveArray::new(buffer![1i32, 2, 3], Validity::NonNullable).into_array();

// This expression doesn't use the child, but demonstrates the ExprArray mechanics
let expr = checked_add(lit(10), lit(5));

let dtype = DType::Primitive(PType::I32, NonNullable);
let expr_array = ExprArray::try_new(child, expr, dtype).unwrap();

let actual = expr_array.to_canonical().into_array();

let expect = (0..3).map(|_| 15i32).collect::<PrimitiveArray>();
assert_arrays_eq!(expect, actual);
}
}
88 changes: 88 additions & 0 deletions vortex-array/src/arrays/expr/vtable/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

mod array;
mod canonical;
mod operations;
mod operator;
mod visitor;

use std::fmt::Debug;

use vortex_buffer::ByteBuffer;
use vortex_dtype::DType;
use vortex_error::{VortexResult, vortex_bail};

use crate::arrays::expr::ExprArray;
use crate::expr::Expression;
use crate::serde::ArrayChildren;
use crate::vtable::{NotSupported, VTable};
use crate::{EncodingId, EncodingRef, vtable};

vtable!(Expr);

#[derive(Clone, Debug)]
pub struct ExprEncoding;

impl VTable for ExprVTable {
type Array = ExprArray;
type Encoding = ExprEncoding;
type Metadata = ExprArrayMetadata;

type ArrayVTable = Self;
type CanonicalVTable = Self;
type OperationsVTable = Self;
type ValidityVTable = NotSupported;
type VisitorVTable = Self;
type ComputeVTable = NotSupported;
type EncodeVTable = NotSupported;
type OperatorVTable = Self;

fn id(_encoding: &Self::Encoding) -> EncodingId {
EncodingId::new_ref("vortex.expr")
}

fn encoding(_array: &Self::Array) -> EncodingRef {
EncodingRef::new_ref(ExprEncoding.as_ref())
}

fn metadata(array: &ExprArray) -> VortexResult<Self::Metadata> {
Ok(ExprArrayMetadata((array.expr.clone(), array.dtype.clone())))
}

fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
Ok(None)
}

fn deserialize(_bytes: &[u8]) -> VortexResult<Self::Metadata> {
vortex_bail!("unsupported")
}

fn build(
_encoding: &ExprEncoding,
dtype: &DType,
len: usize,
ExprArrayMetadata((expr, root_dtype)): &Self::Metadata,
buffers: &[ByteBuffer],
children: &dyn ArrayChildren,
) -> VortexResult<ExprArray> {
if !buffers.is_empty() {
vortex_bail!("Expected 0 buffers, got {}", buffers.len());
}

let Ok(child) = children.get(0, root_dtype, len) else {
vortex_bail!("Expected 1 child, got {}", children.len());
};

ExprArray::try_new(child, expr.clone(), dtype.clone())
}
}

pub struct ExprArrayMetadata((Expression, DType));

impl Debug for ExprArrayMetadata {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// Since this is used in display method we can omit the dtype.
self.0.0.fmt_sql(f)
}
}
37 changes: 37 additions & 0 deletions vortex-array/src/arrays/expr/vtable/operations.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::ops::Range;

use vortex_error::VortexExpect;
use vortex_scalar::Scalar;

use crate::arrays::ConstantArray;
use crate::arrays::expr::{ExprArray, ExprVTable};
use crate::stats::ArrayStats;
use crate::vtable::OperationsVTable;
use crate::{Array, ArrayRef, IntoArray};

impl OperationsVTable<ExprVTable> for ExprVTable {
fn slice(array: &ExprArray, range: Range<usize>) -> ArrayRef {
let child = array.child.slice(range);

ExprArray {
child,
expr: array.expr.clone(),
dtype: array.dtype.clone(),
stats: ArrayStats::default(),
}
.into_array()
}

fn scalar_at(array: &ExprArray, index: usize) -> Scalar {
// TODO(joe): this is unchecked
array
.expr
.evaluate(&ConstantArray::new(array.child.scalar_at(index), 1).into_array())
.vortex_expect("cannot fail")
.as_constant()
.vortex_expect("expr are scalar so cannot fail")
}
}
65 changes: 65 additions & 0 deletions vortex-array/src/arrays/expr/vtable/operator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_error::VortexResult;

use crate::ArrayRef;
use crate::arrays::expr::{ExprArray, ExprVTable};
use crate::expr::root;
use crate::expr::session::ExprSession;
use crate::expr::transform::ExprOptimizer;
use crate::vtable::OperatorVTable;

impl OperatorVTable<ExprVTable> for ExprVTable {
fn reduce(array: &ExprArray) -> VortexResult<Option<ArrayRef>> {
// Get the default expression session
let session = ExprSession::default();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe leave a louder fixme / todo that we have to inject this

let optimizer = ExprOptimizer::new(&session);

// Try to optimize the expression with type information
let optimized_expr =
optimizer.optimize_typed(array.expr().clone(), array.child().dtype())?;

if optimized_expr != *array.expr() {
// If the expression simplified to just root(), return the child directly
if optimized_expr == root() {
return Ok(Some(array.child().clone()));
}

let new_dtype = optimized_expr.return_dtype(array.child().dtype())?;
Ok(Some(
ExprArray::try_new(array.child().clone(), optimized_expr, new_dtype)?.into(),
))
} else {
Ok(None)
}
}
}

#[cfg(test)]
mod tests {

use vortex_dtype::Nullability;
use vortex_error::VortexExpect;

use super::*;
use crate::IntoArray;
use crate::arrays::{PrimitiveArray, PrimitiveVTable};
use crate::expr::{get_item, pack, root};

#[test]
fn test_expr_array_reduce_pack_unpack() -> VortexResult<()> {
let array = PrimitiveArray::from_iter([1i32, 2, 3, 4, 5]);

let expr = get_item("a", pack([("a", root())], Nullability::NonNullable));

let expr_array = ExprArray::new_infer_dtype(array.into_array(), expr)?;

// Call reduce - it should optimize pack(a: $).a to just $
let reduced = expr_array.reduce()?.vortex_expect("reduce failed");

assert!(reduced.is::<PrimitiveVTable>());

Ok(())
}
}
14 changes: 14 additions & 0 deletions vortex-array/src/arrays/expr/vtable/visitor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use crate::arrays::expr::{ExprArray, ExprVTable};
use crate::vtable::VisitorVTable;
use crate::{ArrayBufferVisitor, ArrayChildVisitor};

impl VisitorVTable<ExprVTable> for ExprVTable {
fn visit_buffers(_array: &ExprArray, _visitor: &mut dyn ArrayBufferVisitor) {}

fn visit_children(array: &ExprArray, visitor: &mut dyn ArrayChildVisitor) {
visitor.visit_child("child", array.child.as_ref());
}
}
Loading
Loading