Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arrow-array/src/builder/generic_bytes_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ mod tests {
write!(builder, "buz").unwrap();
builder.append_value("");
let a = builder.finish();
let r: Vec<_> = a.iter().map(|x| x.unwrap()).collect();
let r: Vec<_> = a.iter().flatten().collect();
assert_eq!(r, &["foo", "bar\n", "fizbuz"])
}
}
12 changes: 7 additions & 5 deletions arrow-array/src/builder/generic_bytes_view_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,8 @@ pub fn make_view(data: &[u8], block_id: u32, offset: u32) -> u128 {

#[cfg(test)]
mod tests {
use core::str;

use super::*;
use crate::Array;

Expand Down Expand Up @@ -642,7 +644,7 @@ mod tests {
let array = v.finish_cloned();
array.to_data().validate_full().unwrap();
assert_eq!(array.data_buffers().len(), 5);
let actual: Vec<_> = array.iter().map(Option::unwrap).collect();
let actual: Vec<_> = array.iter().flatten().collect();
assert_eq!(
actual,
&[
Expand Down Expand Up @@ -692,13 +694,13 @@ mod tests {
let mut exp_builder = StringViewBuilder::new();
let mut fixed_builder = StringViewBuilder::new().with_fixed_block_size(STARTING_BLOCK_SIZE);

let long_string = String::from_utf8(vec![b'a'; STARTING_BLOCK_SIZE as usize]).unwrap();
let long_string = str::from_utf8(&[b'a'; STARTING_BLOCK_SIZE as usize]).unwrap();

for i in 0..9 {
// 8k, 16k, 32k, 64k, 128k, 256k, 512k, 1M, 2M
for _ in 0..(2_u32.pow(i)) {
exp_builder.append_value(&long_string);
fixed_builder.append_value(&long_string);
exp_builder.append_value(long_string);
fixed_builder.append_value(long_string);
}
exp_builder.flush_in_progress();
fixed_builder.flush_in_progress();
Expand All @@ -721,7 +723,7 @@ mod tests {
}

// Add one more value, and the buffer stop growing.
exp_builder.append_value(&long_string);
exp_builder.append_value(long_string);
exp_builder.flush_in_progress();
assert_eq!(
exp_builder.completed.last().unwrap().capacity(),
Expand Down
2 changes: 1 addition & 1 deletion arrow-cast/src/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9392,7 +9392,7 @@ mod tests {
Some(vec![Some(0), None, Some(2)]),
]);
let a = cast_with_options(&array, &DataType::Utf8, &options).unwrap();
let r: Vec<_> = a.as_string::<i32>().iter().map(|x| x.unwrap()).collect();
let r: Vec<_> = a.as_string::<i32>().iter().flatten().collect();
assert_eq!(r, &["[0, 1, 2]", "[0, null, 2]"]);
}
#[test]
Expand Down
5 changes: 3 additions & 2 deletions arrow-csv/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ mod tests {
};
use arrow_array::types::*;
use arrow_buffer::i256;
use core::str;
use std::io::{Cursor, Read, Seek};
use std::sync::Arc;

Expand Down Expand Up @@ -508,7 +509,7 @@ Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,cupcakes
consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378,06:51:20,cupcakes
sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
"#;
assert_eq!(expected.to_string(), String::from_utf8(buffer).unwrap());
assert_eq!(expected, str::from_utf8(&buffer).unwrap());
}

#[test]
Expand Down Expand Up @@ -558,7 +559,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
,
0.290472,0.290472
"#;
assert_eq!(expected.to_string(), String::from_utf8(buffer).unwrap());
assert_eq!(expected, str::from_utf8(&buffer).unwrap());
}

#[test]
Expand Down
1 change: 1 addition & 0 deletions arrow-data/src/byte_view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ pub struct ByteView {

impl ByteView {
#[inline(always)]
/// Convert `ByteView` to `u128` by concatenating the fields
pub fn as_u128(self) -> u128 {
(self.length as u128)
| ((self.prefix as u128) << 32)
Expand Down
41 changes: 31 additions & 10 deletions arrow-data/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ pub struct ArrayData {
nulls: Option<NullBuffer>,
}

/// A thread-safe, shared reference to the Arrow array data.
pub type ArrayDataRef = Arc<ArrayData>;

impl ArrayData {
Expand Down Expand Up @@ -1747,7 +1748,12 @@ pub enum BufferSpec {
/// for array slicing and interoperability with `Vec`, which cannot be over-aligned.
///
/// Note that these alignment requirements will vary between architectures
FixedWidth { byte_width: usize, alignment: usize },
FixedWidth {
/// The width of each element in bytes
byte_width: usize,
/// The alignment required by Rust for an array of the corresponding primitive
alignment: usize,
},
/// Variable width, such as string data for utf8 data
VariableWidth,
/// Buffer holds a bitmap.
Expand Down Expand Up @@ -1783,6 +1789,7 @@ pub struct ArrayDataBuilder {

impl ArrayDataBuilder {
#[inline]
/// Creates a new array data builder
pub const fn new(data_type: DataType) -> Self {
Self {
data_type,
Expand All @@ -1796,61 +1803,72 @@ impl ArrayDataBuilder {
}
}

/// Creates a new array data builder from an existing one, changing the data type
pub fn data_type(self, data_type: DataType) -> Self {
Self { data_type, ..self }
}

#[inline]
#[allow(clippy::len_without_is_empty)]
/// Sets the length of the [ArrayData]
pub const fn len(mut self, n: usize) -> Self {
self.len = n;
self
}

/// Sets the null buffer of the [ArrayData]
pub fn nulls(mut self, nulls: Option<NullBuffer>) -> Self {
self.nulls = nulls;
self.null_count = None;
self.null_bit_buffer = None;
self
}

/// Sets the null count of the [ArrayData]
pub fn null_count(mut self, null_count: usize) -> Self {
self.null_count = Some(null_count);
self
}

/// Sets the `null_bit_buffer` of the [ArrayData]
pub fn null_bit_buffer(mut self, buf: Option<Buffer>) -> Self {
self.nulls = None;
self.null_bit_buffer = buf;
self
}

/// Sets the offset of the [ArrayData]
#[inline]
pub const fn offset(mut self, n: usize) -> Self {
self.offset = n;
self
}

/// Sets the buffers of the [ArrayData]
pub fn buffers(mut self, v: Vec<Buffer>) -> Self {
self.buffers = v;
self
}

/// Adds a single buffer to the [ArrayData]'s buffers
pub fn add_buffer(mut self, b: Buffer) -> Self {
self.buffers.push(b);
self
}

pub fn add_buffers(mut self, bs: Vec<Buffer>) -> Self {
/// Adds multiple buffers to the [ArrayData]'s buffers
pub fn add_buffers<I: IntoIterator<Item = Buffer>>(mut self, bs: I) -> Self {
self.buffers.extend(bs);
self
}

/// Sets the child data of the [ArrayData]
pub fn child_data(mut self, v: Vec<ArrayData>) -> Self {
self.child_data = v;
self
}

/// Adds a single child data to the [ArrayData]'s child data
pub fn add_child_data(mut self, r: ArrayData) -> Self {
self.child_data.push(r);
self
Expand All @@ -1873,22 +1891,25 @@ impl ArrayDataBuilder {

/// Same as [`Self::build_unchecked`] but ignoring `force_validate` feature flag
unsafe fn build_impl(self) -> ArrayData {
let nulls = self.nulls.or_else(|| {
let buffer = self.null_bit_buffer?;
let buffer = BooleanBuffer::new(buffer, self.offset, self.len);
Some(match self.null_count {
Some(n) => NullBuffer::new_unchecked(buffer, n),
None => NullBuffer::new(buffer),
let nulls = self
.nulls
.or_else(|| {
let buffer = self.null_bit_buffer?;
let buffer = BooleanBuffer::new(buffer, self.offset, self.len);
Some(match self.null_count {
Some(n) => NullBuffer::new_unchecked(buffer, n),
None => NullBuffer::new(buffer),
})
})
});
.filter(|b| b.null_count() != 0);

ArrayData {
data_type: self.data_type,
len: self.len,
offset: self.offset,
buffers: self.buffers,
child_data: self.child_data,
nulls: nulls.filter(|b| b.null_count() != 0),
nulls,
}
}

Expand Down
4 changes: 4 additions & 0 deletions arrow-data/src/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
// specific language governing permissions and limitations
// under the License.

//! Defines maximum and minimum values for `decimal256` and `decimal128` types for varying precisions.
//!
//! Also provides functions to validate if a given decimal value is within the valid range of the decimal type.

use arrow_buffer::i256;
use arrow_schema::ArrowError;

Expand Down
1 change: 1 addition & 0 deletions arrow-data/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
//!
//! For a higher-level, strongly-typed interface see [arrow_array](https://docs.rs/arrow_array)

#![warn(missing_docs)]
mod data;
pub use data::*;

Expand Down
5 changes: 5 additions & 0 deletions arrow-data/src/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
// specific language governing permissions and limitations
// under the License.

//! Low-level array data abstractions.
//!
//! Provides utilities for creating, manipulating, and converting Arrow arrays
//! made of primitive types, strings, and nested types.

use super::{data::new_buffers, ArrayData, ArrayDataBuilder, ByteView};
use crate::bit_mask::set_bits;
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
Expand Down
2 changes: 1 addition & 1 deletion arrow-json/src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1007,7 +1007,7 @@ mod tests {
let map_values = map.values().as_list::<i32>();
assert_eq!(map.value_offsets(), &[0, 1, 3, 5]);

let k: Vec<_> = map_keys.iter().map(|x| x.unwrap()).collect();
let k: Vec<_> = map_keys.iter().flatten().collect();
assert_eq!(&k, &["a", "a", "b", "c", "a"]);

let list_values = map_values.values().as_string::<i32>();
Expand Down
1 change: 1 addition & 0 deletions arrow-ord/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
//! ```
//!

#![warn(missing_docs)]
pub mod cmp;
#[doc(hidden)]
pub mod comparison;
Expand Down
2 changes: 2 additions & 0 deletions arrow-ord/src/rank.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.

//! Provides `rank` function to assign a rank to each value in an array

use arrow_array::cast::AsArray;
use arrow_array::types::*;
use arrow_array::{downcast_primitive_array, Array, ArrowNativeTypeOp, GenericByteArray};
Expand Down
2 changes: 2 additions & 0 deletions arrow-ord/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,9 @@ where
/// One column to be used in lexicographical sort
#[derive(Clone, Debug)]
pub struct SortColumn {
/// The column to sort
pub values: ArrayRef,
/// Sort options for this column
pub options: Option<SortOptions>,
}

Expand Down
1 change: 1 addition & 0 deletions arrow-pyarrow-integration-testing/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
//! This library demonstrates a minimal usage of Rust's C data interface to pass
//! arrays from and to Python.

#![warn(missing_docs)]
use std::sync::Arc;

use arrow::array::new_empty_array;
Expand Down
1 change: 1 addition & 0 deletions arrow-row/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@
//! [compared]: PartialOrd
//! [compare]: PartialOrd

#![warn(missing_docs)]
use std::cmp::Ordering;
use std::hash::{Hash, Hasher};
use std::sync::Arc;
Expand Down
4 changes: 3 additions & 1 deletion arrow-schema/src/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,11 +420,13 @@ pub enum IntervalUnit {
MonthDayNano,
}

// Sparse or Dense union layouts
/// Sparse or Dense union layouts
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Copy)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum UnionMode {
/// Sparse union layout
Sparse,
/// Dense union layout
Dense,
}

Expand Down
16 changes: 16 additions & 0 deletions arrow-schema/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,39 @@ use std::error::Error;
pub enum ArrowError {
/// Returned when functionality is not yet available.
NotYetImplemented(String),
/// Wraps an external error.
ExternalError(Box<dyn Error + Send + Sync>),
/// Error during casting from one type to another.
CastError(String),
/// Memory or buffer error.
MemoryError(String),
/// Error during parsing from a string.
ParseError(String),
/// Error during schema-related operations.
SchemaError(String),
/// Error during computation.
ComputeError(String),
/// Error during division by zero.
DivideByZero,
/// Error when an arithmetic operation overflows.
ArithmeticOverflow(String),
/// Error during CSV-related operations.
CsvError(String),
/// Error during JSON-related operations.
JsonError(String),
/// Error during IO operations.
IoError(String, std::io::Error),
/// Error during IPC operations in `arrow-ipc` or `arrow-flight`.
IpcError(String),
/// Error indicating that an unexpected or bad argument was passed to a function.
InvalidArgumentError(String),
/// Error during Parquet operations.
ParquetError(String),
/// Error during import or export to/from the C Data Interface
CDataInterface(String),
/// Error when a dictionary key is bigger than the key type
DictionaryKeyOverflowError,
/// Error when the run end index in a REE array is bigger than the array length
RunEndIndexOverflowError,
}

Expand Down
Loading
Loading