Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions vortex-python/python/vortex/_lib/arrays.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ class Array:
def from_arrow(
obj: pa.Array[pa.Scalar[pa.DataType]] | pa.ChunkedArray[pa.Scalar[pa.DataType]] | pa.Table,
) -> Array: ...
@staticmethod
def from_range(obj: range) -> Array: ...
def to_arrow_array(self) -> pa.Array[pa.Scalar[pa.DataType]]: ...
@property
def id(self) -> str: ...
Expand Down
30 changes: 29 additions & 1 deletion vortex-python/python/vortex/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,8 @@ def array(
| pyarrow.ChunkedArray[pyarrow.Scalar[Any]] # pyright: ignore[reportExplicitAny]
| pyarrow.Table
| list[Any] # pyright: ignore[reportExplicitAny]
| pandas.DataFrame,
| pandas.DataFrame
| range,
) -> Array:
"""The main entry point for creating Vortex arrays from other Python objects.

Expand Down Expand Up @@ -394,10 +395,37 @@ def array(
]
]

Initialize a Vortex array from a range:

>>> vortex.array(range(-3, 3)).to_arrow_array()
<pyarrow.lib.Int64Array object at ...>
[
-3,
-2,
-1,
0,
1,
2
]

With a step:

>>> vortex.array(range(-1_000_000, 10_000_000, 2_000_000)).to_arrow_array()
<pyarrow.lib.Int64Array object at ...>
[
-1000000,
1000000,
3000000,
5000000,
7000000,
9000000
]
"""

if isinstance(obj, list):
return Array.from_arrow(pyarrow.array(obj))
if isinstance(obj, range):
return Array.from_range(obj)
try:
import pandas

Expand Down
79 changes: 77 additions & 2 deletions vortex-python/src/arrays/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ pub(crate) mod fastlanes;
pub(crate) mod from_arrow;
mod native;
pub(crate) mod py;
mod range_to_sequence;

use arrow_array::{Array as ArrowArray, ArrayRef as ArrowArrayRef};
use pyo3::exceptions::{PyTypeError, PyValueError};
use pyo3::prelude::*;
use pyo3::types::{PyDict, PyList};
use pyo3::types::{PyDict, PyList, PyRange, PyRangeMethods};
use vortex::arrays::ChunkedVTable;
use vortex::arrow::IntoArrowArray;
use vortex::compute::{Operator, compare, take};
use vortex::dtype::{DType, Nullability, PType, match_each_integer_ptype};
use vortex::error::VortexError;
use vortex::{Array, ArrayRef, ToCanonical};

Expand Down Expand Up @@ -181,7 +183,10 @@ impl PyArray {

/// Convert a PyArrow object into a Vortex array.
///
/// One of :class:`pyarrow.Array`, :class:`pyarrow.ChunkedArray`, or :class:`pyarrow.Table`.
/// Parameters
/// ----------
/// obj: pyarrow.Array | pyarrow.ChunkedArray | pyarrow.Table
/// The array to convert.
///
/// Returns
/// -------
Expand All @@ -191,6 +196,76 @@ impl PyArray {
from_arrow::from_arrow(&obj)
}

/// Convert a Python range into a Vortex array.
///
/// Unless the array is empty, the encoding of the array is Sequence, which uses O(1) bytes to
/// represent an array of any size.
///
/// Parameters
/// ----------
/// range: range
/// The range to convert.
///
/// Returns
/// -------
/// :class:`~vortex.Array`
///
///
/// Examples
/// --------
///
/// ```python
/// >>> array = vx.Array.from_range(range(0, 10))
/// >>> array
/// <vortex.SequenceArray object at ...>
/// >>> array.to_arrow_array()
/// <pyarrow.lib.Int64Array object at ...>
/// [
/// 0,
/// 1,
/// 2,
/// 3,
/// 4,
/// 5,
/// 6,
/// 7,
/// 8,
/// 9
/// ]
/// ```
#[staticmethod]
#[pyo3(signature = (range, *, dtype = None))]
fn from_range(range: Bound<PyAny>, dtype: Option<Bound<PyDType>>) -> PyResult<PyArrayRef> {
let range = range.downcast::<PyRange>()?;
let start = range.start()?;
let stop = range.stop()?;
let step = range.step()?;

let (ptype, dtype) = if let Some(dtype) = dtype {
let dtype = dtype.downcast::<PyDType>()?.get().inner().clone();
let DType::Primitive(ptype, ..) = &dtype else {
return Err(PyValueError::new_err(
"Cannot construct non-numeric array from a range.",
));
};
(*ptype, dtype)
} else {
let ptype = if start > 0 && stop > 0 {
PType::U64
} else {
PType::I64
};
let dtype = DType::Primitive(ptype, Nullability::NonNullable);
(ptype, dtype)
};

let array = match_each_integer_ptype!(ptype, |T| {
range_to_sequence::sequence_array_from_range::<T>(start, stop, step, dtype)
})?;

Ok(PyVortex(array))
}

/// Convert this array to a PyArrow array.
///
/// .. seealso::
Expand Down
154 changes: 154 additions & 0 deletions vortex-python/src/arrays/range_to_sequence.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex::ArrayRef;
use vortex::arrays::PrimitiveArray;
use vortex::buffer::Buffer;
use vortex::dtype::{DType, NativePType, Nullability};
use vortex::encodings::sequence::SequenceArray;
use vortex::error::{VortexExpect, VortexResult, vortex_bail};
use vortex::scalar::PValue;
use vortex::validity::Validity;

pub fn sequence_array_from_range<T: NativePType + TryFrom<isize> + Into<PValue>>(
start: isize,
stop: isize,
step: isize,
dtype: DType,
) -> VortexResult<ArrayRef> {
if step == 0 {
vortex_bail!("Step must not be zero");
}

let Some(len) = range_len(start, stop, step) else {
let validity = match dtype.nullability() {
Nullability::NonNullable => Validity::NonNullable,
Nullability::Nullable => Validity::AllValid,
};
return Ok(PrimitiveArray::new::<T>(Buffer::empty(), validity).to_array());
};
let Ok(start) = T::try_from(start) else {
vortex_bail!(
"Start, {}, does not fit in requested dtype: {}",
start,
dtype
);
};
let Ok(step) = T::try_from(step) else {
vortex_bail!("Step, {}, does not fit in requested dtype: {}", step, dtype);
};

Ok(SequenceArray::typed_new::<T>(start, step, dtype.nullability(), len)?.to_array())
}

fn range_len(start: isize, stop: isize, step: isize) -> Option<usize> {
if step > 0 {
if start > stop {
return None;
}

let len = (stop - start + step - 1) / step;
let len =
usize::try_from(len).vortex_expect("stop >= start, step > 0, so len is non-negative");
Some(len)
} else {
assert!(step != 0);

if stop > start {
return None;
}

let len = (start - stop + -step - 1) / -step;
let len =
usize::try_from(len).vortex_expect("start >= stop, step < 0, so len is non-negative");
Some(len)
}
}

#[cfg(test)]
mod test {
use vortex::IntoArray as _;
use vortex::arrow::IntoArrowArray;
use vortex::buffer::buffer;
use vortex::dtype::{DType, Nullability, PType};

use crate::arrays::range_to_sequence::{range_len, sequence_array_from_range};

#[test]
fn test_range_len() {
assert_eq!(range_len(0, 10, 1).unwrap(), 10);
assert_eq!(range_len(0, 10, 5).unwrap(), 2);
assert_eq!(range_len(0, 10, 10).unwrap(), 1);
assert_eq!(range_len(0, 10, 100).unwrap(), 1);
assert_eq!(range_len(-5, -5, 1).unwrap(), 0);
assert_eq!(range_len(-5, 5, 3).unwrap(), 4);
assert_eq!(range_len(-7, -5, 1).unwrap(), 2);
assert_eq!(range_len(3, -3, -1).unwrap(), 6);
assert_eq!(range_len(10, 3, 1), None);
assert_eq!(range_len(0, 10, -1), None);
}

#[test]
fn test_sequence_array_from_len() {
let dtype = DType::Primitive(PType::U16, Nullability::NonNullable);
let arr = sequence_array_from_range::<u16>(0, 10, 1, dtype.clone()).unwrap();
assert_eq!(arr.dtype(), &dtype);
assert_eq!(
&arr.into_arrow_preferred().unwrap(),
&buffer![0u16, 1, 2, 3, 4, 5, 6, 7, 8, 9]
.into_array()
.into_arrow_preferred()
.unwrap()
);

let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
let arr = sequence_array_from_range::<i32>(0, 10, 5, dtype.clone()).unwrap();
assert_eq!(arr.dtype(), &dtype);
assert_eq!(
&arr.into_arrow_preferred().unwrap(),
&buffer![0i32, 5]
.into_array()
.into_arrow_preferred()
.unwrap()
);

let dtype = DType::Primitive(PType::I8, Nullability::NonNullable);
let arr = sequence_array_from_range::<i8>(-5, 5, 3, dtype.clone()).unwrap();
assert_eq!(arr.dtype(), &dtype);
assert_eq!(
&arr.into_arrow_preferred().unwrap(),
&buffer![-5i8, -2, 1, 4]
.into_array()
.into_arrow_preferred()
.unwrap()
);

let dtype = DType::Primitive(PType::I8, Nullability::NonNullable);
let arr = sequence_array_from_range::<i8>(3, -3, -1, dtype.clone()).unwrap();
assert_eq!(arr.dtype(), &dtype);
assert_eq!(
&arr.into_arrow_preferred().unwrap(),
&buffer![3i8, 2, 1, 0, -1, -2]
.into_array()
.into_arrow_preferred()
.unwrap()
);

let dtype = DType::Primitive(PType::U32, Nullability::NonNullable);
let result = sequence_array_from_range::<u32>(1_000_000, 10, -500_000, dtype);
assert!(
result.is_err_and(|err| err.to_string().contains("does not fit in requested dtype"))
);

let dtype = DType::Primitive(PType::I32, Nullability::NonNullable);
let arr = sequence_array_from_range::<i32>(1_000_000, 10, -500_000, dtype.clone()).unwrap();
assert_eq!(arr.dtype(), &dtype);
assert_eq!(
&arr.into_arrow_preferred().unwrap(),
&buffer![1_000_000i32, 500_000]
.into_array()
.into_arrow_preferred()
.unwrap()
);
}
}
47 changes: 47 additions & 0 deletions vortex-python/test/test_from_range.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright the Vortex contributors

import vortex as vx


def test_from_range_0_10_1():
arr = vx.array(range(0, 10))
assert list(arr.scalar_at(i).as_py() for i in range(len(arr))) == list(range(0, 10))


def test_from_range_0_10_5():
arr = vx.array(range(0, 10, 5))
assert list(arr.scalar_at(i).as_py() for i in range(len(arr))) == list(range(0, 10, 5))


def test_from_range_0_10_10():
arr = vx.array(range(0, 10, 10))
assert list(arr.scalar_at(i).as_py() for i in range(len(arr))) == [0]


def test_from_range_0_10_100():
arr = vx.array(range(0, 10, 100))
assert list(arr.scalar_at(i).as_py() for i in range(len(arr))) == [0]


def test_from_range_minus_5_5_1():
arr = vx.array(range(-5, 5))
assert list(arr.scalar_at(i).as_py() for i in range(len(arr))) == list(range(-5, 5))


def test_from_range_minus_5_5_3():
arr = vx.array(range(-5, 5, 3))
assert list(arr.scalar_at(i).as_py() for i in range(len(arr))) == [-5, -2, 1, 4]


def test_from_range_minus_7_minus_5():
arr = vx.array(range(-7, -5))
assert list(arr.scalar_at(i).as_py() for i in range(len(arr))) == [-7, -6]


def test_from_range_invalid():
arr = vx.array(range(10, 3))
assert list(arr.scalar_at(i).as_py() for i in range(len(arr))) == []

arr = vx.array(range(0, 10, -1))
assert list(arr.scalar_at(i).as_py() for i in range(len(arr))) == []
Loading