-
Couldn't load subscription status.
- Fork 1k
Closed
Labels
Description
use std::sync::Arc;
use parquet::arrow::*;
use parquet::file::reader::SerializedFileReader;
use parquet::file::serialized_reader::SliceableCursor;
fn read(buffer: Arc<Vec<u8>>, column: usize) {
let file = SliceableCursor::new(buffer);
let file_reader = SerializedFileReader::new(file).unwrap();
let mut arrow_reader = ParquetFileArrowReader::new(Arc::new(file_reader));
let mut record_batch_reader = arrow_reader
.get_record_reader_by_columns(vec![column], 10)
.unwrap();
let batch = record_batch_reader.next().unwrap().unwrap();
println!("{:?}", batch.column(0));
}
fn main() {
// a parquet file with 1 column with invalid utf8
let data = vec![
80, 65, 82, 49, 21, 6, 21, 22, 21, 22, 92, 21, 2, 21, 0, 21, 2, 21, 0, 21, 4, 21,
0, 18, 28, 54, 0, 40, 5, 104, 101, 255, 108, 111, 24, 5, 104, 101, 255, 108, 111,
0, 0, 0, 3, 1, 5, 0, 0, 0, 104, 101, 255, 108, 111, 38, 110, 28, 21, 12, 25, 37,
6, 0, 25, 24, 2, 99, 49, 21, 0, 22, 2, 22, 102, 22, 102, 38, 8, 60, 54, 0, 40, 5,
104, 101, 255, 108, 111, 24, 5, 104, 101, 255, 108, 111, 0, 0, 0, 21, 4, 25, 44,
72, 4, 114, 111, 111, 116, 21, 2, 0, 21, 12, 37, 2, 24, 2, 99, 49, 37, 0, 76, 28,
0, 0, 0, 22, 2, 25, 28, 25, 28, 38, 110, 28, 21, 12, 25, 37, 6, 0, 25, 24, 2, 99,
49, 21, 0, 22, 2, 22, 102, 22, 102, 38, 8, 60, 54, 0, 40, 5, 104, 101, 255, 108,
111, 24, 5, 104, 101, 255, 108, 111, 0, 0, 0, 22, 102, 22, 2, 0, 40, 44, 65, 114,
114, 111, 119, 50, 32, 45, 32, 78, 97, 116, 105, 118, 101, 32, 82, 117, 115, 116,
32, 105, 109, 112, 108, 101, 109, 101, 110, 116, 97, 116, 105, 111, 110, 32, 111,
102, 32, 65, 114, 114, 111, 119, 0, 130, 0, 0, 0, 80, 65, 82, 49,
];
read(Arc::new(data), 0)
}miri output
cargo miri run --example unsafe
StringArray
[
error: Undefined Behavior: type validation failed: encountered 0x001ecbc0, but expected a valid unicode scalar value (in `0..=0x10FFFF` but not in `0xD800..=0xDFFF`)
--> /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/char/convert.rs:94:78
|
94 | if cfg!(debug_assertions) { char::from_u32(i).unwrap() } else { unsafe { transmute(i) } }
| ^^^^^^^^^^^^ type validation failed: encountered 0x001ecbc0, but expected a valid unicode scalar value (in `0..=0x10FFFF` but not in `0xD800..=0xDFFF`)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: inside `std::char::from_u32_unchecked` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/char/convert.rs:94:78
= note: inside closure at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/str/iter.rs:43:22
= note: inside `std::option::Option::<u32>::map::<char, [closure@<std::str::Chars as std::iter::Iterator>::next::{closure#0}]>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/option.rs:489:29
= note: inside `<std::str::Chars as std::iter::Iterator>::next` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/str/iter.rs:41:9
= note: inside `<std::str::CharIndices as std::iter::Iterator>::next` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/str/iter.rs:140:15
= note: inside `<str as std::fmt::Debug>::fmt` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:2079:23
= note: inside `<&str as std::fmt::Debug>::fmt` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:2033:62
= note: inside closure at /home/azureuser/projects/arrow-rs/arrow/src/array/array_string.rs:277:13
= note: inside `arrow::array::array::print_long_array::<arrow::array::array_string::GenericStringArray<i32>, [closure@<arrow::array::array_string::GenericStringArray<i32> as std::fmt::Debug>::fmt::{closure#0}]>` at /home/azureuser/projects/arrow-rs/arrow/src/array/array.rs:561:13
= note: inside `<arrow::array::array_string::GenericStringArray<i32> as std::fmt::Debug>::fmt` at /home/azureuser/projects/arrow-rs/arrow/src/array/array_string.rs:276:9
= note: inside `<std::sync::Arc<dyn arrow::array::array::Array> as std::fmt::Debug>::fmt` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/alloc/src/sync.rs:2266:9
= note: inside `<&std::sync::Arc<dyn arrow::array::array::Array> as std::fmt::Debug>::fmt` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:2033:62
= note: inside `std::fmt::write` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/fmt/mod.rs:1112:17
= note: inside `<std::io::StdoutLock as std::io::Write>::write_fmt` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/mod.rs:1640:15
= note: inside `<&std::io::Stdout as std::io::Write>::write_fmt` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:657:9
= note: inside `<std::io::Stdout as std::io::Write>::write_fmt` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:631:9
= note: inside `std::io::stdio::print_to::<std::io::Stdout>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:934:21
= note: inside `std::io::_print` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/io/stdio.rs:947:5
note: inside `read` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/macros.rs:97:9
--> parquet/examples/unsafe.rs:18:5
|
18 | println!("{:?}", batch.column(0));
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
note: inside `main` at parquet/examples/unsafe.rs:38:5
--> parquet/examples/unsafe.rs:38:5
|
38 | read(Arc::new(data), 0)
| ^^^^^^^^^^^^^^^^^^^^^^^
= note: inside `<fn() as std::ops::FnOnce<()>>::call_once - shim(fn())` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:227:5
= note: inside `std::sys_common::backtrace::__rust_begin_short_backtrace::<fn(), ()>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/sys_common/backtrace.rs:125:18
= note: inside closure at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:63:18
= note: inside `std::ops::function::impls::<impl std::ops::FnOnce<()> for &dyn std::ops::Fn() -> i32 + std::marker::Sync + std::panic::RefUnwindSafe>::call_once` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ops/function.rs:259:13
= note: inside `std::panicking::r#try::do_call::<&dyn std::ops::Fn() -> i32 + std::marker::Sync + std::panic::RefUnwindSafe, i32>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:401:40
= note: inside `std::panicking::r#try::<i32, &dyn std::ops::Fn() -> i32 + std::marker::Sync + std::panic::RefUnwindSafe>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:365:19
= note: inside `std::panic::catch_unwind::<&dyn std::ops::Fn() -> i32 + std::marker::Sync + std::panic::RefUnwindSafe, i32>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:434:14
= note: inside closure at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:45:48
= note: inside `std::panicking::r#try::do_call::<[closure@std::rt::lang_start_internal::{closure#2}], isize>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:401:40
= note: inside `std::panicking::r#try::<isize, [closure@std::rt::lang_start_internal::{closure#2}]>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panicking.rs:365:19
= note: inside `std::panic::catch_unwind::<[closure@std::rt::lang_start_internal::{closure#2}], isize>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/panic.rs:434:14
= note: inside `std::rt::lang_start_internal` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:45:20
= note: inside `std::rt::lang_start::<()>` at /home/azureuser/.rustup/toolchains/nightly-2021-07-03-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/std/src/rt.rs:62:5
= note: this error originates in the macro `println` (in Nightly builds, run with -Z macro-backtrace for more info)
error: aborting due to previous error
Dandandan