Skip to content

Commit 1291274

Browse files
sunchaokszucs
authored andcommitted
ARROW-4137: [Rust] Move parquet code into a separate crate
This moves the parquet related code into a separate sub-crate that depends on arrow. Author: Chao Sun <sunchao@apache.org> Author: Kouhei Sutou <kou@clear-code.com> Closes #3291 from sunchao/ARROW-4137 and squashes the following commits: b2bcc1c <Kouhei Sutou> Add support for version update on release process bbeaaba <Chao Sun> Fix rustfmt 0545fd9 <Chao Sun> ARROW-4137: Move parquet code into a separate crate
1 parent 9376d85 commit 1291274

40 files changed

+199
-162
lines changed

dev/release/00-prepare.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,9 @@ update_versions() {
100100
cd "${SOURCE_DIR}/../../rust"
101101
sed -i.bak -r -e \
102102
"s/^version = \".+\"/version = \"${version}\"/g" \
103-
Cargo.toml
104-
rm -f Cargo.toml.bak
105-
git add Cargo.toml
103+
Cargo.toml parquet/Cargo.toml
104+
rm -f Cargo.toml.bak parquet/Cargo.toml.bak
105+
git add Cargo.toml parquet/Cargo.toml
106106
cd -
107107
}
108108

rust/Cargo.toml

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
[package]
1919
name = "arrow"
20-
version = "0.11.0"
20+
version = "0.12.0-SNAPSHOT"
2121
description = "Rust implementation of Apache Arrow"
2222
homepage = "https://github.com/apache/arrow"
2323
repository = "https://github.com/apache/arrow"
@@ -42,17 +42,6 @@ serde_derive = "1.0.80"
4242
serde_json = "1.0.13"
4343
rand = "0.5"
4444
csv = "1.0.0"
45-
parquet-format = "2.5.0"
46-
quick-error = "1.2.2"
47-
byteorder = "1"
48-
thrift = "0.0.4"
49-
snap = "0.2"
50-
brotli = "2.5"
51-
flate2 = "1.0.2"
52-
lz4 = "1.23"
53-
zstd = "0.4"
54-
chrono = "0.4"
55-
num-bigint = "0.2"
5645
num = "0.2"
5746

5847
[dev-dependencies]
@@ -66,3 +55,6 @@ harness = false
6655
[[bench]]
6756
name = "builder"
6857
harness = false
58+
59+
[workspace]
60+
members = ["parquet"]

rust/parquet/Cargo.toml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
[package]
19+
name = "parquet"
20+
version = "0.12.0-SNAPSHOT"
21+
license = "Apache-2.0"
22+
description = "Apache Parquet implementation in Rust"
23+
authors = ["Apache Arrow <dev@arrow.apache.org>"]
24+
keywords = [ "arrow", "parquet", "hadoop" ]
25+
readme = "README.md"
26+
build = "build.rs"
27+
edition = "2018"
28+
29+
[dependencies]
30+
parquet-format = "2.5.0"
31+
quick-error = "1.2.2"
32+
byteorder = "1"
33+
thrift = "0.0.4"
34+
snap = "0.2"
35+
brotli = "2.5"
36+
flate2 = "1.0.2"
37+
lz4 = "1.23"
38+
zstd = "0.4"
39+
chrono = "0.4"
40+
num-bigint = "0.2"
41+
arrow = { path = ".." }
42+
43+
[dev-dependencies]
44+
lazy_static = "1"
45+
rand = "0.5"
File renamed without changes.

rust/src/parquet/basic.rs renamed to rust/parquet/src/basic.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use std::{convert, fmt, result, str};
2222

2323
use parquet_format as parquet;
2424

25-
use crate::parquet::errors::ParquetError;
25+
use crate::errors::ParquetError;
2626

2727
// ----------------------------------------------------------------------
2828
// Types from the Thrift definition

rust/src/parquet/column/mod.rs renamed to rust/parquet/src/column/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
//! ```rust,no_run
3939
//! use std::{fs, path::Path, rc::Rc};
4040
//!
41-
//! use arrow::parquet::{
41+
//! use parquet::{
4242
//! column::{reader::ColumnReader, writer::ColumnWriter},
4343
//! file::{
4444
//! properties::WriterProperties,

rust/src/parquet/column/page.rs renamed to rust/parquet/src/column/page.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717

1818
//! Contains Parquet Page definitions and page reader interface.
1919
20-
use crate::parquet::basic::{Encoding, PageType};
21-
use crate::parquet::errors::Result;
22-
use crate::parquet::file::{metadata::ColumnChunkMetaData, statistics::Statistics};
23-
use crate::parquet::util::memory::ByteBufferPtr;
20+
use crate::basic::{Encoding, PageType};
21+
use crate::errors::Result;
22+
use crate::file::{metadata::ColumnChunkMetaData, statistics::Statistics};
23+
use crate::util::memory::ByteBufferPtr;
2424

2525
/// Parquet Page definition.
2626
///

rust/src/parquet/column/reader.rs renamed to rust/parquet/src/column/reader.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,15 @@ use std::{
2424
};
2525

2626
use super::page::{Page, PageReader};
27-
use crate::parquet::basic::*;
28-
use crate::parquet::data_type::*;
29-
use crate::parquet::encodings::{
27+
use crate::basic::*;
28+
use crate::data_type::*;
29+
use crate::encodings::{
3030
decoding::{get_decoder, Decoder, DictDecoder, PlainDecoder},
3131
levels::LevelDecoder,
3232
};
33-
use crate::parquet::errors::{ParquetError, Result};
34-
use crate::parquet::schema::types::ColumnDescPtr;
35-
use crate::parquet::util::memory::ByteBufferPtr;
33+
use crate::errors::{ParquetError, Result};
34+
use crate::schema::types::ColumnDescPtr;
35+
use crate::util::memory::ByteBufferPtr;
3636

3737
/// Column reader for a Parquet type.
3838
pub enum ColumnReader {
@@ -490,14 +490,14 @@ mod tests {
490490
use rand::distributions::range::SampleRange;
491491
use std::{collections::VecDeque, rc::Rc, vec::IntoIter};
492492

493-
use crate::parquet::basic::Type as PhysicalType;
494-
use crate::parquet::column::page::Page;
495-
use crate::parquet::encodings::{
493+
use crate::basic::Type as PhysicalType;
494+
use crate::column::page::Page;
495+
use crate::encodings::{
496496
encoding::{get_encoder, DictEncoder, Encoder},
497497
levels::{max_buffer_size, LevelEncoder},
498498
};
499-
use crate::parquet::schema::types::{ColumnDescriptor, ColumnPath, Type as SchemaType};
500-
use crate::parquet::util::{
499+
use crate::schema::types::{ColumnDescriptor, ColumnPath, Type as SchemaType};
500+
use crate::util::{
501501
memory::{ByteBufferPtr, MemTracker, MemTrackerPtr},
502502
test_common::random_numbers_range,
503503
};

rust/src/parquet/column/writer.rs renamed to rust/parquet/src/column/writer.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,21 @@
1919
2020
use std::{cmp, collections::VecDeque, mem, rc::Rc};
2121

22-
use crate::parquet::basic::{Compression, Encoding, PageType, Type};
23-
use crate::parquet::column::page::{CompressedPage, Page, PageWriteSpec, PageWriter};
24-
use crate::parquet::compression::{create_codec, Codec};
25-
use crate::parquet::data_type::*;
26-
use crate::parquet::encodings::{
22+
use crate::basic::{Compression, Encoding, PageType, Type};
23+
use crate::column::page::{CompressedPage, Page, PageWriteSpec, PageWriter};
24+
use crate::compression::{create_codec, Codec};
25+
use crate::data_type::*;
26+
use crate::encodings::{
2727
encoding::{get_encoder, DictEncoder, Encoder},
2828
levels::{max_buffer_size, LevelEncoder},
2929
};
30-
use crate::parquet::errors::{ParquetError, Result};
31-
use crate::parquet::file::{
30+
use crate::errors::{ParquetError, Result};
31+
use crate::file::{
3232
metadata::ColumnChunkMetaData,
3333
properties::{WriterProperties, WriterPropertiesPtr, WriterVersion},
3434
};
35-
use crate::parquet::schema::types::ColumnDescPtr;
36-
use crate::parquet::util::memory::{ByteBufferPtr, MemTracker};
35+
use crate::schema::types::ColumnDescPtr;
36+
use crate::util::memory::{ByteBufferPtr, MemTracker};
3737

3838
/// Column writer for a Parquet type.
3939
pub enum ColumnWriter {
@@ -802,15 +802,15 @@ mod tests {
802802

803803
use rand::distributions::range::SampleRange;
804804

805-
use crate::parquet::column::{
805+
use crate::column::{
806806
page::PageReader,
807807
reader::{get_column_reader, get_typed_column_reader, ColumnReaderImpl},
808808
};
809-
use crate::parquet::file::{
809+
use crate::file::{
810810
properties::WriterProperties, reader::SerializedPageReader, writer::SerializedPageWriter,
811811
};
812-
use crate::parquet::schema::types::{ColumnDescriptor, ColumnPath, Type as SchemaType};
813-
use crate::parquet::util::{
812+
use crate::schema::types::{ColumnDescriptor, ColumnPath, Type as SchemaType};
813+
use crate::util::{
814814
io::{FileSink, FileSource},
815815
test_common::{get_temp_file, random_numbers_range},
816816
};

rust/src/parquet/compression.rs renamed to rust/parquet/src/compression.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
//! # Example
2424
//!
2525
//! ```rust
26-
//! use arrow::parquet::{basic::Compression, compression::create_codec};
26+
//! use parquet::{basic::Compression, compression::create_codec};
2727
//!
2828
//! let mut codec = match create_codec(Compression::SNAPPY) {
2929
//! Ok(Some(codec)) => codec,
@@ -48,8 +48,8 @@ use lz4;
4848
use snap::{decompress_len, max_compress_len, Decoder, Encoder};
4949
use zstd;
5050

51-
use crate::parquet::basic::Compression as CodecType;
52-
use crate::parquet::errors::{ParquetError, Result};
51+
use crate::basic::Compression as CodecType;
52+
use crate::errors::{ParquetError, Result};
5353

5454
/// Parquet compression codec interface.
5555
pub trait Codec {
@@ -250,7 +250,7 @@ impl Codec for ZSTDCodec {
250250
mod tests {
251251
use super::*;
252252

253-
use crate::parquet::util::test_common::*;
253+
use crate::util::test_common::*;
254254

255255
fn test_roundtrip(c: CodecType, data: &Vec<u8>) {
256256
let mut c1 = create_codec(c).unwrap().unwrap();

0 commit comments

Comments
 (0)