Skip to content
This repository was archived by the owner on Oct 21, 2024. It is now read-only.

Commit 7142699

Browse files
wesmkszucs
authored andcommitted
Gross hack to avoid unwanted nanosecond promotion with tz-aware timestamp types
1 parent 123d4b8 commit 7142699

File tree

3 files changed

+16
-4
lines changed

3 files changed

+16
-4
lines changed

cpp/src/arrow/python/arrow_to_pandas.cc

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -619,13 +619,18 @@ inline Status ConvertStruct(const PandasOptions& options, const ChunkedArray& da
619619
auto array_type = arr->type();
620620
std::vector<OwnedRef> fields_data(num_fields);
621621
OwnedRef dict_item;
622+
623+
// XXX(wesm): In ARROW-7723, we found as a result of ARROW-3789
624+
PandasOptions modified_options = options;
625+
modified_options.ignore_timezone = true;
626+
622627
for (int c = 0; c < data.num_chunks(); c++) {
623628
auto arr = checked_cast<const StructArray*>(data.chunk(c).get());
624629
// Convert the struct arrays first
625630
for (int32_t i = 0; i < num_fields; i++) {
626631
PyObject* numpy_array;
627-
RETURN_NOT_OK(ConvertArrayToPandas(options, arr->field(static_cast<int>(i)),
628-
nullptr, &numpy_array));
632+
RETURN_NOT_OK(ConvertArrayToPandas(
633+
modified_options, arr->field(static_cast<int>(i)), nullptr, &numpy_array));
629634
fields_data[i].reset(numpy_array);
630635
}
631636

@@ -1678,7 +1683,8 @@ static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions&
16781683
break;
16791684
case Type::TIMESTAMP: {
16801685
const auto& ts_type = checked_cast<const TimestampType&>(*data.type());
1681-
if (ts_type.timezone() != "") {
1686+
// XXX: Hack here for ARROW-7723
1687+
if (ts_type.timezone() != "" && !options.ignore_timezone) {
16821688
*output_type = PandasWriter::DATETIME_NANO_TZ;
16831689
} else if (options.coerce_temporal_nanoseconds) {
16841690
*output_type = PandasWriter::DATETIME_NANO;

cpp/src/arrow/python/arrow_to_pandas.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ struct PandasOptions {
5555
/// Coerce all date and timestamp to datetime64[ns]
5656
bool coerce_temporal_nanoseconds = false;
5757

58+
/// XXX(wesm): Hack for ARROW-7723 to opt out of DATETIME_NANO_TZ conversion
59+
/// path
60+
bool ignore_timezone = false;
61+
5862
/// \brief If true, do not create duplicate PyObject versions of equal
5963
/// objects. This only applies to immutable objects like strings or datetime
6064
/// objects

python/pyarrow/tests/test_pandas.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3215,7 +3215,9 @@ def test_struct_with_timestamp_tz():
32153215
# ARROW-7723
32163216
ts = pd.Timestamp.now()
32173217

3218-
for unit in ['s', 'ms', 'us', 'ns']:
3218+
# XXX: Ensure that this data does not get promoted to nanoseconds (and thus
3219+
# integers) to preserve behavior in 0.15.1
3220+
for unit in ['s', 'ms', 'us']:
32193221
arr = pa.array([ts], type=pa.timestamp(unit))
32203222
arr2 = pa.array([ts], type=pa.timestamp(unit, tz='America/New_York'))
32213223

0 commit comments

Comments
 (0)