-
Notifications
You must be signed in to change notification settings - Fork 996
Closed
Labels
questionFurther information is requestedFurther information is requested
Description
Consider the following code:
fn arrow_schema() -> arrow::datatypes::Schema {
let fields = vec![
arrow::datatypes::Field::new("measurement_time", arrow::datatypes::DataType::Timestamp(arrow::datatypes::TimeUnit::Microsecond, Some("UTC".into())), false),
arrow::datatypes::Field::new("value", arrow::datatypes::DataType::Float64, false),
];
arrow::datatypes::Schema::new(fields)
}
pub fn create_random_readings(start_date: DateTime<Utc>) -> RecordBatch {
let mut measurement_time = Vec::new();
let mut value: Vec<f64> = Vec::new();
for i in 0..10000 {
measurement_time.push(start_date + chrono::Duration::seconds((i * 900).into()));
value.push(rand::random::<f64>());
}
RecordBatch::try_new(
Arc::new(arrow_schema()),
vec![
Arc::new(arrow::array::TimestampMicrosecondArray::from(
measurement_time.iter().map(|measurement_time| measurement_time.timestamp_micros()).collect::<Vec<i64>>()).with_timezone_utc()),
Arc::new(arrow::array::Float64Array::from(value)),
],
).unwrap()
}
Running create_random_readings
would fail with the following error:
InvalidArgumentError("column types must match schema types, expected Timestamp(Microsecond, Some(\"UTC\")) but found Timestamp(Microsecond, Some(\"+00:00\"))
If we look inside the implementation of with_timezone_utc
, we notice the following:
/// Construct a timestamp array with UTC
pub fn with_timezone_utc(self) -> Self {
self.with_timezone("+00:00")
}
I wonder whether with_timezone_utc
should be adjusted to set the timezone to "UTC"
insted. From the fantastic documentation that Andrew added a while ago (huge thanks for that), I would tend to think that the timezone part of Timestamp
should include timezones rather than offsets.
The following code works fine btw:
pub fn create_random_readings(start_date: DateTime<Utc>) -> RecordBatch {
let mut measurement_time = Vec::new();
let mut value: Vec<f64> = Vec::new();
for i in 0..10000 {
measurement_time.push(start_date + chrono::Duration::seconds((i * 900).into()));
value.push(rand::random::<f64>());
}
RecordBatch::try_new(
Arc::new(arrow_schema()),
vec![
Arc::new(arrow::array::TimestampMicrosecondArray::from(
measurement_time.iter().map(|measurement_time| measurement_time.timestamp_micros()).collect::<Vec<i64>>()).with_timezone("UTC")),
Arc::new(arrow::array::Float64Array::from(value)),
],
).unwrap()
}
Not sure if it's considered a bug or an enhancement request, which is why I started with a blank issue
Metadata
Metadata
Assignees
Labels
questionFurther information is requestedFurther information is requested