Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified vortex-array/goldenfiles/constant.metadata
Binary file not shown.
31 changes: 31 additions & 0 deletions vortex-array/src/arrays/constant/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@ use vortex_scalar::Scalar;

use crate::stats::ArrayStats;

/// Protobuf-encoded metadata for [`ConstantArray`].
///
/// When the serialized scalar value is small enough (see `CONSTANT_INLINE_THRESHOLD`),
/// it is inlined directly in the metadata to avoid a device-to-host copy on GPU.
#[derive(Clone, prost::Message)]
pub struct ConstantMetadata {
#[prost(optional, bytes, tag = "1")]
pub(super) scalar_value: Option<Vec<u8>>,
}

#[derive(Clone, Debug)]
pub struct ConstantArray {
pub(super) scalar: Scalar,
Expand Down Expand Up @@ -34,3 +44,24 @@ impl ConstantArray {
self.scalar
}
}

#[cfg(test)]
mod tests {
use vortex_scalar::ScalarValue;

use super::ConstantMetadata;
use crate::ProstMetadata;
use crate::test_harness::check_metadata;

#[cfg_attr(miri, ignore)]
#[test]
fn test_constant_metadata() {
let scalar_bytes: Vec<u8> = ScalarValue::from(i32::MAX).to_protobytes();
check_metadata(
"constant.metadata",
ProstMetadata(ConstantMetadata {
scalar_value: Some(scalar_bytes),
}),
);
}
}
1 change: 1 addition & 0 deletions vortex-array/src/arrays/constant/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub use arbitrary::ArbitraryConstantArray;

mod array;
pub use array::ConstantArray;
pub(crate) use array::ConstantMetadata;
pub(crate) use vtable::canonical::constant_canonicalize;

pub(crate) mod compute;
Expand Down
47 changes: 33 additions & 14 deletions vortex-array/src/arrays/constant/vtable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@ use vortex_scalar::ScalarValue;
use vortex_session::VortexSession;

use crate::ArrayRef;
use crate::EmptyMetadata;
use crate::DeserializeMetadata;
use crate::ExecutionCtx;
use crate::IntoArray;
use crate::ProstMetadata;
use crate::SerializeMetadata;
use crate::arrays::ConstantArray;
use crate::arrays::constant::ConstantMetadata;
use crate::arrays::constant::compute::rules::PARENT_RULES;
use crate::arrays::constant::vtable::canonical::constant_canonicalize;
use crate::buffer::BufferHandle;
Expand All @@ -39,10 +42,14 @@ impl ConstantVTable {
pub const ID: ArrayId = ArrayId::new_ref("vortex.constant");
}

/// Maximum size (in bytes) of a protobuf-encoded scalar value that will be inlined
/// into the array metadata. Values larger than this are stored only in the buffer.
const CONSTANT_INLINE_THRESHOLD: usize = 1024;

impl VTable for ConstantVTable {
type Array = ConstantArray;

type Metadata = EmptyMetadata;
type Metadata = ProstMetadata<ConstantMetadata>;

type ArrayVTable = Self;
type OperationsVTable = Self;
Expand All @@ -53,35 +60,47 @@ impl VTable for ConstantVTable {
Self::ID
}

fn metadata(_array: &ConstantArray) -> VortexResult<Self::Metadata> {
Ok(EmptyMetadata)
fn metadata(array: &ConstantArray) -> VortexResult<Self::Metadata> {
let proto_bytes: Vec<u8> = array.scalar().value().to_protobytes();
let scalar_value = (proto_bytes.len() <= CONSTANT_INLINE_THRESHOLD).then_some(proto_bytes);
Ok(ProstMetadata(ConstantMetadata { scalar_value }))
}

fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
Ok(Some(vec![]))
fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
Ok(Some(metadata.serialize()))
}

fn deserialize(
_bytes: &[u8],
bytes: &[u8],
_dtype: &DType,
_len: usize,
_session: &VortexSession,
) -> VortexResult<Self::Metadata> {
Ok(EmptyMetadata)
// Empty bytes indicates an old writer that didn't produce metadata.
if bytes.is_empty() {
return Ok(ProstMetadata(ConstantMetadata { scalar_value: None }));
}
let metadata = <Self::Metadata as DeserializeMetadata>::deserialize(bytes)?;
Ok(ProstMetadata(metadata))
}

fn build(
dtype: &DType,
len: usize,
_metadata: &Self::Metadata,
metadata: &Self::Metadata,
buffers: &[BufferHandle],
_children: &dyn ArrayChildren,
) -> VortexResult<ConstantArray> {
if buffers.len() != 1 {
vortex_bail!("Expected 1 buffer, got {}", buffers.len());
}
let buffer = buffers[0].clone().try_to_host_sync()?;
let sv = ScalarValue::from_protobytes(&buffer)?;
// Prefer reading the scalar from inlined metadata to avoid device-to-host copies.
let sv = if let Some(ref proto_bytes) = metadata.scalar_value {
ScalarValue::from_protobytes(proto_bytes)?
} else {
if buffers.len() != 1 {
vortex_bail!("Expected 1 buffer, got {}", buffers.len());
}
let buffer = buffers[0].clone().try_to_host_sync()?;
ScalarValue::from_protobytes(&buffer)?
Comment on lines +94 to +102
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so we duplicate them?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for small constants yea, to keep forward compat

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also if not and we need the scalar in the device we would need to issue a h2d copy anyway, these values are small enough to just duplicate I think

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this ever actually get triggered?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it does for clickbench or tpch

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but we do have constant arrays and they can't be read to the gpu without an extra host roundtrip, so constantarray::build is definitely triggered on a clickbench scan

Copy link
Contributor

@joseph-isaacs joseph-isaacs Feb 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we find out what scalar this is

};
let scalar = Scalar::new(dtype.clone(), sv);
Ok(ConstantArray::new(scalar, len))
}
Expand Down
Loading