Skip to content

Add TimeMilli and TimeMicro fields and conversions for the record API #7544

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 57 additions & 2 deletions parquet/src/record/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,12 @@ pub enum Field {
/// Date without a time of day, stores the number of days from the
/// Unix epoch, 1 January 1970.
Date(i32),

/// The total number of milliseconds since midnight.
TimeMillis(i32),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think adding these new variants means this is an API change and thus must wait for our next breaking release (in July)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's fine, I expected as much.

What's the process here, do we just let this sit around till July? There doesn't seem to be a branch for stable (55) or the next major release (56)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, we'll just leave the PR around until main opens for 56. That will probably be June sometime.

We could also potentially make a new branch to merge all the queued changes as well, but we haven't done that historically

/// The total number of microseconds since midnight.
TimeMicros(i64),

/// Milliseconds from the Unix epoch, 1 January 1970.
TimestampMillis(i64),
/// Microseconds from the Unix epoch, 1 January 1970.
Expand Down Expand Up @@ -638,6 +644,8 @@ impl Field {
Field::Date(_) => "Date",
Field::Str(_) => "Str",
Field::Bytes(_) => "Bytes",
Field::TimeMillis(_) => "TimeMillis",
Field::TimeMicros(_) => "TimeMicros",
Field::TimestampMillis(_) => "TimestampMillis",
Field::TimestampMicros(_) => "TimestampMicros",
Field::Group(_) => "Group",
Expand Down Expand Up @@ -671,7 +679,7 @@ impl Field {
ConvertedType::UINT_16 => Field::UShort(value as u16),
ConvertedType::UINT_32 => Field::UInt(value as u32),
ConvertedType::DATE => Field::Date(value),
ConvertedType::TIME_MILLIS => Field::TimestampMillis(value as i64),
ConvertedType::TIME_MILLIS => Field::TimeMillis(value),
ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i32(
value,
descr.type_precision(),
Expand All @@ -687,6 +695,7 @@ impl Field {
match descr.converted_type() {
ConvertedType::INT_64 | ConvertedType::NONE => Field::Long(value),
ConvertedType::UINT_64 => Field::ULong(value as u64),
ConvertedType::TIME_MICROS => Field::TimeMicros(value),
ConvertedType::TIMESTAMP_MILLIS => Field::TimestampMillis(value),
ConvertedType::TIMESTAMP_MICROS => Field::TimestampMicros(value),
ConvertedType::DECIMAL => Field::Decimal(Decimal::from_i64(
Expand Down Expand Up @@ -795,6 +804,8 @@ impl Field {
Field::Str(s) => Value::String(s.to_owned()),
Field::Bytes(b) => Value::String(BASE64_STANDARD.encode(b.data())),
Field::Date(d) => Value::String(convert_date_to_string(*d)),
Field::TimeMillis(t) => Value::String(convert_time_millis_to_string(*t)),
Field::TimeMicros(t) => Value::String(convert_time_micros_to_string(*t)),
Field::TimestampMillis(ts) => Value::String(convert_timestamp_millis_to_string(*ts)),
Field::TimestampMicros(ts) => Value::String(convert_timestamp_micros_to_string(*ts)),
Field::Group(row) => row.to_json_value(),
Expand Down Expand Up @@ -864,6 +875,12 @@ impl fmt::Display for Field {
Field::Str(ref value) => write!(f, "\"{value}\""),
Field::Bytes(ref value) => write!(f, "{:?}", value.data()),
Field::Date(value) => write!(f, "{}", convert_date_to_string(value)),
Field::TimeMillis(value) => {
write!(f, "{}", convert_time_millis_to_string(value))
}
Field::TimeMicros(value) => {
write!(f, "{}", convert_time_micros_to_string(value))
}
Field::TimestampMillis(value) => {
write!(f, "{}", convert_timestamp_millis_to_string(value))
}
Expand Down Expand Up @@ -936,6 +953,32 @@ fn convert_timestamp_micros_to_string(value: i64) -> String {
convert_timestamp_secs_to_string(value / 1000000)
}

/// Helper method to convert Parquet time (milliseconds since midnight) into a string.
/// Input `value` is a number of milliseconds since midnight.
/// Time is displayed in HH:MM:SS.sss format.
#[inline]
fn convert_time_millis_to_string(value: i32) -> String {
let total_ms = value as u64;
let hours = total_ms / (60 * 60 * 1000);
let minutes = (total_ms % (60 * 60 * 1000)) / (60 * 1000);
let seconds = (total_ms % (60 * 1000)) / 1000;
let millis = total_ms % 1000;
format!("{:02}:{:02}:{:02}.{:03}", hours, minutes, seconds, millis)
}

/// Helper method to convert Parquet time (microseconds since midnight) into a string.
/// Input `value` is a number of microseconds since midnight.
/// Time is displayed in HH:MM:SS.ssssss format.
#[inline]
fn convert_time_micros_to_string(value: i64) -> String {
let total_us = value as u64;
let hours = total_us / (60 * 60 * 1000 * 1000);
let minutes = (total_us % (60 * 60 * 1000 * 1000)) / (60 * 1000 * 1000);
let seconds = (total_us % (60 * 1000 * 1000)) / (1000 * 1000);
let micros = total_us % (1000 * 1000);
format!("{:02}:{:02}:{:02}.{:06}", hours, minutes, seconds, micros)
}

/// Helper method to convert Parquet decimal into a string.
/// We assert that `scale >= 0` and `precision > scale`, but this will be enforced
/// when constructing Parquet schema.
Expand Down Expand Up @@ -1057,7 +1100,7 @@ mod tests {

let descr = make_column_descr![PhysicalType::INT32, ConvertedType::TIME_MILLIS];
let row = Field::convert_int32(&descr, 14611);
assert_eq!(row, Field::TimestampMillis(14611));
assert_eq!(row, Field::TimeMillis(14611));

let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DECIMAL, 0, 8, 2];
let row = Field::convert_int32(&descr, 444);
Expand All @@ -1082,6 +1125,10 @@ mod tests {
let row = Field::convert_int64(&descr, 1541186529153123);
assert_eq!(row, Field::TimestampMicros(1541186529153123));

let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIME_MICROS];
let row = Field::convert_int64(&descr, 47445123456);
assert_eq!(row, Field::TimeMicros(47445123456));

let descr = make_column_descr![PhysicalType::INT64, ConvertedType::NONE];
let row = Field::convert_int64(&descr, 2222);
assert_eq!(row, Field::Long(2222));
Expand Down Expand Up @@ -1959,6 +2006,14 @@ mod tests {
Field::TimestampMicros(12345678901).to_json_value(),
Value::String(convert_timestamp_micros_to_string(12345678901))
);
assert_eq!(
Field::TimeMillis(47445123).to_json_value(),
Value::String(String::from("13:10:45.123"))
);
assert_eq!(
Field::TimeMicros(47445123456).to_json_value(),
Value::String(String::from("13:10:45.123456"))
);

let fields = vec![
("X".to_string(), Field::Int(1)),
Expand Down
Loading