Skip to content

Commit 7f20f6e

Browse files
wesmxhochy
authored andcommitted
ARROW-818: [Python] Expand Sphinx API docs, pyarrow.* namespace. Add factory functions for time32, time64
Author: Wes McKinney <wes.mckinney@twosigma.com> Closes #557 from wesm/ARROW-818 and squashes the following commits: 96ce436 [Wes McKinney] Expand Sphinx API docs, pyarrow.* namespace. Add factory functions for time32, time64
1 parent bb287e2 commit 7f20f6e

File tree

8 files changed

+195
-25
lines changed

8 files changed

+195
-25
lines changed

python/doc/source/api.rst

Lines changed: 58 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ API Reference
2424

2525
.. _api.functions:
2626

27-
Type Metadata and Schemas
28-
-------------------------
27+
Type and Schema Factory Functions
28+
---------------------------------
2929

3030
.. autosummary::
3131
:toctree: generated/
@@ -43,6 +43,8 @@ Type Metadata and Schemas
4343
float16
4444
float32
4545
float64
46+
time32
47+
time64
4648
timestamp
4749
date32
4850
date64
@@ -53,10 +55,8 @@ Type Metadata and Schemas
5355
struct
5456
dictionary
5557
field
56-
DataType
57-
Field
58-
Schema
5958
schema
59+
from_numpy_dtype
6060

6161
Scalar Value Types
6262
------------------
@@ -68,6 +68,7 @@ Scalar Value Types
6868
NAType
6969
Scalar
7070
ArrayValue
71+
BooleanValue
7172
Int8Value
7273
Int16Value
7374
Int32Value
@@ -82,6 +83,11 @@ Scalar Value Types
8283
BinaryValue
8384
StringValue
8485
FixedSizeBinaryValue
86+
Date32Value
87+
Date64Value
88+
TimestampValue
89+
DecimalValue
90+
8591

8692
Array Types and Constructors
8793
----------------------------
@@ -91,31 +97,42 @@ Array Types and Constructors
9197

9298
array
9399
Array
94-
NullArray
95-
NumericArray
96-
IntegerArray
97-
FloatingPointArray
98100
BooleanArray
101+
DictionaryArray
102+
FloatingPointArray
103+
IntegerArray
99104
Int8Array
100105
Int16Array
101106
Int32Array
102107
Int64Array
108+
NullArray
109+
NumericArray
103110
UInt8Array
104111
UInt16Array
105112
UInt32Array
106113
UInt64Array
107-
DictionaryArray
114+
BinaryArray
115+
FixedSizeBinaryArray
108116
StringArray
117+
Time32Array
118+
Time64Array
119+
Date32Array
120+
Date64Array
121+
TimestampArray
122+
DecimalArray
123+
ListArray
109124

110125
Tables and Record Batches
111126
-------------------------
112127

113128
.. autosummary::
114129
:toctree: generated/
115130

131+
ChunkedArray
116132
Column
117133
RecordBatch
118134
Table
135+
get_record_batch_size
119136

120137
Tensor type and Functions
121138
-------------------------
@@ -141,7 +158,7 @@ Input / Output and Shared Memory
141158
MemoryMappedFile
142159
memory_map
143160
create_memory_map
144-
PythonFileInterface
161+
PythonFile
145162

146163
Interprocess Communication and Messaging
147164
----------------------------------------
@@ -165,3 +182,33 @@ Memory Pools
165182
jemalloc_memory_pool
166183
total_allocated_bytes
167184
set_memory_pool
185+
186+
Type Classes
187+
------------
188+
189+
.. autosummary::
190+
:toctree: generated/
191+
192+
DataType
193+
DecimalType
194+
DictionaryType
195+
FixedSizeBinaryType
196+
Time32Type
197+
Time64Type
198+
TimestampType
199+
Field
200+
Schema
201+
202+
.. currentmodule:: pyarrow.parquet
203+
204+
Apache Parquet
205+
--------------
206+
207+
.. autosummary::
208+
:toctree: generated/
209+
210+
ParquetDataset
211+
ParquetFile
212+
read_table
213+
write_metadata
214+
write_table

python/pyarrow/__init__.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,20 @@
3131
from pyarrow._array import (null, bool_,
3232
int8, int16, int32, int64,
3333
uint8, uint16, uint32, uint64,
34-
timestamp, date32, date64,
34+
time32, time64, timestamp, date32, date64,
3535
float16, float32, float64,
3636
binary, string, decimal,
3737
list_, struct, dictionary, field,
38-
DataType, FixedSizeBinaryType,
39-
Field, Schema, schema,
38+
DataType,
39+
DecimalType,
40+
DictionaryType,
41+
FixedSizeBinaryType,
42+
TimestampType,
43+
Time32Type,
44+
Time64Type,
45+
Field,
46+
Schema,
47+
schema,
4048
Array, Tensor,
4149
array,
4250
from_numpy_dtype,
@@ -47,25 +55,34 @@
4755
Int16Array, UInt16Array,
4856
Int32Array, UInt32Array,
4957
Int64Array, UInt64Array,
50-
ListArray, StringArray,
58+
ListArray,
59+
BinaryArray, StringArray,
60+
FixedSizeBinaryArray,
5161
DictionaryArray,
62+
Date32Array, Date64Array,
63+
TimestampArray, Time32Array, Time64Array,
64+
DecimalArray,
5265
ArrayValue, Scalar, NA, NAType,
5366
BooleanValue,
5467
Int8Value, Int16Value, Int32Value, Int64Value,
5568
UInt8Value, UInt16Value, UInt32Value, UInt64Value,
5669
FloatValue, DoubleValue, ListValue,
57-
BinaryValue, StringValue, FixedSizeBinaryValue)
70+
BinaryValue, StringValue, FixedSizeBinaryValue,
71+
DecimalValue,
72+
Date32Value, Date64Value, TimestampValue)
5873

59-
from pyarrow._io import (HdfsFile, NativeFile, PythonFileInterface,
74+
from pyarrow._io import (HdfsFile, NativeFile, PythonFile,
6075
Buffer, BufferReader, InMemoryOutputStream,
6176
OSFile, MemoryMappedFile, memory_map,
6277
frombuffer, read_tensor, write_tensor,
6378
memory_map, create_memory_map,
64-
get_record_batch_size, get_tensor_size)
79+
get_record_batch_size, get_tensor_size,
80+
have_libhdfs, have_libhdfs3)
6581

6682
from pyarrow._memory import (MemoryPool, total_allocated_bytes,
6783
set_memory_pool, default_memory_pool)
68-
from pyarrow._table import Column, RecordBatch, Table, concat_tables
84+
from pyarrow._table import (ChunkedArray, Column, RecordBatch, Table,
85+
concat_tables)
6986
from pyarrow._error import (ArrowException,
7087
ArrowKeyError,
7188
ArrowInvalid,

python/pyarrow/_array.pxd

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,16 @@ cdef class TimestampType(DataType):
4242
const CTimestampType* ts_type
4343

4444

45+
cdef class Time32Type(DataType):
46+
cdef:
47+
const CTime32Type* time_type
48+
49+
50+
cdef class Time64Type(DataType):
51+
cdef:
52+
const CTime64Type* time_type
53+
54+
4555
cdef class FixedSizeBinaryType(DataType):
4656
cdef:
4757
const CFixedSizeBinaryType* fixed_size_binary_type

python/pyarrow/_array.pyx

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,30 @@ cdef class TimestampType(DataType):
127127
return None
128128

129129

130+
cdef class Time32Type(DataType):
131+
132+
cdef void init(self, const shared_ptr[CDataType]& type):
133+
DataType.init(self, type)
134+
self.time_type = <const CTime32Type*> type.get()
135+
136+
property unit:
137+
138+
def __get__(self):
139+
return timeunit_to_string(self.time_type.unit())
140+
141+
142+
cdef class Time64Type(DataType):
143+
144+
cdef void init(self, const shared_ptr[CDataType]& type):
145+
DataType.init(self, type)
146+
self.time_type = <const CTime64Type*> type.get()
147+
148+
property unit:
149+
150+
def __get__(self):
151+
return timeunit_to_string(self.time_type.unit())
152+
153+
130154
cdef class FixedSizeBinaryType(DataType):
131155

132156
cdef void init(self, const shared_ptr[CDataType]& type):
@@ -342,6 +366,7 @@ def int64():
342366

343367

344368
cdef dict _timestamp_type_cache = {}
369+
cdef dict _time_type_cache = {}
345370

346371

347372
cdef timeunit_to_string(TimeUnit unit):
@@ -369,7 +394,7 @@ def timestamp(unit_str, tz=None):
369394
elif unit_str == 'ns':
370395
unit = TimeUnit_NANO
371396
else:
372-
raise TypeError('Invalid TimeUnit string')
397+
raise ValueError('Invalid TimeUnit string')
373398

374399
cdef TimestampType out = TimestampType()
375400

@@ -388,6 +413,50 @@ def timestamp(unit_str, tz=None):
388413
return out
389414

390415

416+
def time32(unit_str):
417+
cdef:
418+
TimeUnit unit
419+
c_string c_timezone
420+
421+
if unit_str == "s":
422+
unit = TimeUnit_SECOND
423+
elif unit_str == 'ms':
424+
unit = TimeUnit_MILLI
425+
else:
426+
raise ValueError('Invalid TimeUnit for time32: {}'.format(unit_str))
427+
428+
cdef Time32Type out
429+
if unit in _time_type_cache:
430+
return _time_type_cache[unit]
431+
else:
432+
out = Time32Type()
433+
out.init(ctime32(unit))
434+
_time_type_cache[unit] = out
435+
return out
436+
437+
438+
def time64(unit_str):
439+
cdef:
440+
TimeUnit unit
441+
c_string c_timezone
442+
443+
if unit_str == "us":
444+
unit = TimeUnit_MICRO
445+
elif unit_str == 'ns':
446+
unit = TimeUnit_NANO
447+
else:
448+
raise ValueError('Invalid TimeUnit for time64: {}'.format(unit_str))
449+
450+
cdef Time64Type out
451+
if unit in _time_type_cache:
452+
return _time_type_cache[unit]
453+
else:
454+
out = Time64Type()
455+
out.init(ctime64(unit))
456+
_time_type_cache[unit] = out
457+
return out
458+
459+
391460
def date32():
392461
return primitive_type(_Type_DATE32)
393462

@@ -516,6 +585,9 @@ cdef Schema box_schema(const shared_ptr[CSchema]& type):
516585

517586

518587
def from_numpy_dtype(object dtype):
588+
"""
589+
Convert NumPy dtype to pyarrow.DataType
590+
"""
519591
cdef shared_ptr[CDataType] c_type
520592
with nogil:
521593
check_status(pyarrow.NumPyDtypeToArrow(dtype, &c_type))

python/pyarrow/_io.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ cdef class NativeFile:
307307
# Python file-like objects
308308

309309

310-
cdef class PythonFileInterface(NativeFile):
310+
cdef class PythonFile(NativeFile):
311311
cdef:
312312
object handle
313313

@@ -600,7 +600,7 @@ cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader):
600600
source = BufferReader(source)
601601
elif not isinstance(source, NativeFile) and hasattr(source, 'read'):
602602
# Optimistically hope this is file-like
603-
source = PythonFileInterface(source, mode='r')
603+
source = PythonFile(source, mode='r')
604604

605605
if isinstance(source, NativeFile):
606606
nf = source
@@ -622,7 +622,7 @@ cdef get_writer(object source, shared_ptr[OutputStream]* writer):
622622
source = OSFile(source, mode='w')
623623
elif not isinstance(source, NativeFile) and hasattr(source, 'write'):
624624
# Optimistically hope this is file-like
625-
source = PythonFileInterface(source, mode='w')
625+
source = PythonFile(source, mode='w')
626626

627627
if isinstance(source, NativeFile):
628628
nf = source

python/pyarrow/includes/libarrow.pxd

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
106106
cdef cppclass CTime64Type" arrow::Time64Type"(CFixedWidthType):
107107
TimeUnit unit()
108108

109+
shared_ptr[CDataType] ctime32" arrow::time32"(TimeUnit unit)
110+
shared_ptr[CDataType] ctime64" arrow::time64"(TimeUnit unit)
111+
109112
cdef cppclass CDictionaryType" arrow::DictionaryType"(CFixedWidthType):
110113
CDictionaryType(const shared_ptr[CDataType]& index_type,
111114
const shared_ptr[CArray]& dictionary)

python/pyarrow/tests/test_io.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
def test_python_file_write():
3333
buf = BytesIO()
3434

35-
f = pa.PythonFileInterface(buf)
35+
f = pa.PythonFile(buf)
3636

3737
assert f.tell() == 0
3838

@@ -56,7 +56,7 @@ def test_python_file_read():
5656
data = b'some sample data'
5757

5858
buf = BytesIO(data)
59-
f = pa.PythonFileInterface(buf, mode='r')
59+
f = pa.PythonFile(buf, mode='r')
6060

6161
assert f.size() == len(data)
6262

0 commit comments

Comments
 (0)