From 905c46be1a8b1605c7bbd44392fea2bf4182eb01 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 15 Apr 2024 07:18:42 -0400 Subject: [PATCH] PrettyPrint support for `StringViewArray` and `BinaryViewArray` (#5634) --- arrow-cast/src/display.rs | 19 ++++++++ arrow-cast/src/pretty.rs | 98 +++++++++++++++++++++++++++++++++++---- 2 files changed, 109 insertions(+), 8 deletions(-) diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs index 9ec12f6e63d2..a5f69b660944 100644 --- a/arrow-cast/src/display.rs +++ b/arrow-cast/src/display.rs @@ -282,7 +282,9 @@ fn make_formatter<'a>( DataType::Boolean => array_format(as_boolean_array(array), options), DataType::Utf8 => array_format(array.as_string::(), options), DataType::LargeUtf8 => array_format(array.as_string::(), options), + DataType::Utf8View => array_format(array.as_string_view(), options), DataType::Binary => array_format(array.as_binary::(), options), + DataType::BinaryView => array_format(array.as_binary_view(), options), DataType::LargeBinary => array_format(array.as_binary::(), options), DataType::FixedSizeBinary(_) => { let a = array.as_any().downcast_ref::().unwrap(); @@ -733,6 +735,13 @@ impl<'a, O: OffsetSizeTrait> DisplayIndex for &'a GenericStringArray { } } +impl<'a> DisplayIndex for &'a StringViewArray { + fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { + write!(f, "{}", self.value(idx))?; + Ok(()) + } +} + impl<'a, O: OffsetSizeTrait> DisplayIndex for &'a GenericBinaryArray { fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { let v = self.value(idx); @@ -743,6 +752,16 @@ impl<'a, O: OffsetSizeTrait> DisplayIndex for &'a GenericBinaryArray { } } +impl<'a> DisplayIndex for &'a BinaryViewArray { + fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { + let v = self.value(idx); + for byte in v { + write!(f, "{byte:02x}")?; + } + Ok(()) + } +} + impl<'a> DisplayIndex for &'a FixedSizeBinaryArray { fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { let v = self.value(idx); diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs index 550afa9f739d..da7c5e9bb6b4 100644 --- a/arrow-cast/src/pretty.rs +++ b/arrow-cast/src/pretty.rs @@ -18,11 +18,14 @@ //! Utilities for pretty printing record batches. Note this module is not //! available unless `feature = "prettyprint"` is enabled. -use crate::display::{ArrayFormatter, FormatOptions}; +use std::fmt::Display; + +use comfy_table::{Cell, Table}; + use arrow_array::{Array, ArrayRef, RecordBatch}; use arrow_schema::ArrowError; -use comfy_table::{Cell, Table}; -use std::fmt::Display; + +use crate::display::{ArrayFormatter, FormatOptions}; /// Create a visual representation of record batches pub fn pretty_format_batches(results: &[RecordBatch]) -> Result { @@ -131,17 +134,20 @@ fn create_column( #[cfg(test)] mod tests { + use std::fmt::Write; + use std::sync::Arc; + + use half::f16; - use super::*; - use crate::display::array_value_to_string; use arrow_array::builder::*; use arrow_array::types::*; use arrow_array::*; use arrow_buffer::Buffer; use arrow_schema::*; - use half::f16; - use std::fmt::Write; - use std::sync::Arc; + + use crate::display::array_value_to_string; + + use super::*; #[test] fn test_pretty_format_batches() { @@ -317,6 +323,82 @@ mod tests { assert_eq!(expected, actual, "Actual result:\n{table}"); } + #[test] + fn test_pretty_format_string_view() { + let schema = Arc::new(Schema::new(vec![Field::new( + "d1", + DataType::Utf8View, + true, + )])); + + // Use a small capacity so we end up with multiple views + let mut builder = StringViewBuilder::with_capacity(20); + builder.append_value("hello"); + builder.append_null(); + builder.append_value("longer than 12 bytes"); + builder.append_value("another than 12 bytes"); + builder.append_null(); + builder.append_value("small"); + + let array: ArrayRef = Arc::new(builder.finish()); + let batch = RecordBatch::try_new(schema, vec![array]).unwrap(); + let table = pretty_format_batches(&[batch]).unwrap().to_string(); + let expected = vec![ + "+-----------------------+", + "| d1 |", + "+-----------------------+", + "| hello |", + "| |", + "| longer than 12 bytes |", + "| another than 12 bytes |", + "| |", + "| small |", + "+-----------------------+", + ]; + + let actual: Vec<&str> = table.lines().collect(); + + assert_eq!(expected, actual, "Actual result:\n{table:#?}"); + } + + #[test] + fn test_pretty_format_binary_view() { + let schema = Arc::new(Schema::new(vec![Field::new( + "d1", + DataType::BinaryView, + true, + )])); + + // Use a small capacity so we end up with multiple views + let mut builder = BinaryViewBuilder::with_capacity(20); + builder.append_value(b"hello"); + builder.append_null(); + builder.append_value(b"longer than 12 bytes"); + builder.append_value(b"another than 12 bytes"); + builder.append_null(); + builder.append_value(b"small"); + + let array: ArrayRef = Arc::new(builder.finish()); + let batch = RecordBatch::try_new(schema, vec![array]).unwrap(); + let table = pretty_format_batches(&[batch]).unwrap().to_string(); + let expected = vec![ + "+--------------------------------------------+", + "| d1 |", + "+--------------------------------------------+", + "| 68656c6c6f |", + "| |", + "| 6c6f6e676572207468616e203132206279746573 |", + "| 616e6f74686572207468616e203132206279746573 |", + "| |", + "| 736d616c6c |", + "+--------------------------------------------+", + ]; + + let actual: Vec<&str> = table.lines().collect(); + + assert_eq!(expected, actual, "Actual result:\n\n{table:#?}"); + } + #[test] fn test_pretty_format_fixed_size_binary() { // define a schema.