Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
2124 commits
Select commit Hold shift + click to select a range
0a3bdba
ARROW-2287: [Python] chunked array not iterable, not indexable
kszucs Jun 7, 2018
b165e46
ARROW-2675: Fix build error with clang-10 (Apple Clang / LLVM)
songqing Jun 7, 2018
e82a34a
ARROW-530: [C++/Python] Provide subpools for better memory allocation …
rok Jun 7, 2018
d411871
guard against reading data from 0-length buffer
trxcllnt Jun 8, 2018
dffd193
ARROW-2672: [Python] Build ORC extension in manylinux1 wheels
xhochy Jun 8, 2018
f0a8811
ARROW-2682: [CI] Notify in Slack about broken builds
xhochy Jun 8, 2018
27b869a
ARROW-2649: [C++] Add GenerateBits() function to improve bitmap writi…
pitrou Jun 8, 2018
5b4728b
ARROW-2635: [Ruby] Add LICENSE.txt and NOTICE.txt for Apache Arrow Ruby
kou Jun 8, 2018
3155291
ARROW-2680: [Python] Add documentation about type inference in Table.…
xhochy Jun 9, 2018
481d15a
guard against reading data from 0-length buffer
trxcllnt Jun 8, 2018
d5c5f39
[JS] Add note about IPC format to readme (#2122)
domoritz Jun 9, 2018
8156e25
ARROW-2551: [Plasma] Improve notification logic
zhijunfu Jun 9, 2018
1e4f8dd
Add Ruby to README.md
wesm Jun 9, 2018
6df28d3
ARROW-2691: [Rust] Update code formatting with latest Rust stable
andygrove Jun 9, 2018
dc80a76
ARROW-2695: [Python] Prevent calling scalar constructors directly
kszucs Jun 11, 2018
34890cc
ARROW-2689: [Python] Remove parameter timestamps_to_ms
xhochy Jun 11, 2018
9a76caa
ARROW-2692: [Python] Add test for writing dictionary encoded columns …
xhochy Jun 12, 2018
6b80fa8
ARROW-2693: [Python] pa.chunked_array causes a segmentation fault on …
xhochy Jun 12, 2018
53cc09e
ARROW-2626: [Python] Add column name to exception message when writin…
louispotok Jun 12, 2018
df44691
ARROW-2694 - [Python] ArrayValue string conversion returns the repres…
fjetter Jun 14, 2018
8d296cc
ARROW-2554: [Python] fix timestamp unit detection from python lists
crepererum Jun 14, 2018
2b00b8a
ARROW-2488: [C++] Add Boost 1.67 and 1.68 as recognized versions
wesm Jun 14, 2018
d4755e4
ARROW-2677: [Python] Expose Parquet ZSTD compression
xhochy Jun 14, 2018
acab16c
GetVisitor WIP
trxcllnt Jun 14, 2018
c15eee7
ARROW-2395: [Python] Fix flake8 warnings outside of pyarrow/ director…
wesm Jun 15, 2018
392fd02
ARROW-2696: [JAVA] enhance AllocationListener with an onFailedAllocat…
Jun 15, 2018
a82a027
ARROW-2721: [C++] Fix ORC and Protocol Buffers link error
kou Jun 19, 2018
e17f95d
ARROW-1780 - JDBC Adapter to convert Relational Data objects to Arrow…
atuldambalkar Jun 19, 2018
7c81bd6
ARROW-2723: [C++] Add .pc for arrow orc
shiro615 Jun 21, 2018
97c24d7
ARROW-2726: [C++] Fix the latest Boost version
kou Jun 21, 2018
884474c
ARROW-2699: [C++/Python] Add Table method that replaces a column with…
xhochy Jun 21, 2018
1442816
ARROW-2400: [C++] Fix Status destructor performance
pitrou Jun 23, 2018
87cb95c
ARROW-2716: [Python] Make manylinux1 base image independent of Python…
xhochy Jun 23, 2018
1ab2abc
ARROW-2729: [GLib] Add decimal128 array builder
shiro615 Jun 23, 2018
5cd1df6
ARROW-902: [C++] Script for downloading all thirdparty build dependen…
wesm Jun 23, 2018
df327f6
ARROW-2732: [GLib] Update brew packages for macOS
jemiam Jun 23, 2018
6ef9888
ARROW-2733: [GLib] Cast garrow_decimal128 to gint64
shiro615 Jun 23, 2018
58a2366
ARROW-2676: [Packaging] Deploy build artifacts to github releases
kszucs Jun 24, 2018
4db8a36
ARROW-2731 Add external Orc capability
msarahan Jun 25, 2018
ac0e521
ARROW-2725: [Java] make Accountant.AllocationOutcome publicly visible…
Jun 25, 2018
b165c86
ARROW-2727: [Java] Fix POM file issue causing build failure in java/a…
zjffdu Jun 26, 2018
27ddd3f
ARROW-2741: [Python] pa.array from np.datetime[D] and type=pa.date64 …
xhochy Jun 26, 2018
2484278
ARROW-2738: [GLib] Use Brewfile on installation process
jemiam Jun 26, 2018
e757e7e
ARROW-2739: [GLib] Use G_DECLARE_DERIVABLE_TYPE
shiro615 Jun 26, 2018
4481b07
ARROW-2608: [Java/Python] Add pyarrow.{Array,Field}.from_jvm / jvm_bu…
xhochy Jun 26, 2018
c73a88d
ARROW-2745: [C++] ORC ExternalProject needs to declare dependency on …
xhochy Jun 26, 2018
c4d1001
ARROW-2740: [Python] Add address property to Buffer
pitrou Jun 26, 2018
ae69a78
[JS] Add example to load data via fetch (#2125)
domoritz Jun 26, 2018
061aa5e
[JS] Add NPM shield, add yarn installation (#2124)
domoritz Jun 26, 2018
98253f3
[C++] Fix typo in README.md
rsabhi Jun 26, 2018
7e6dd60
ARROW-2662: [Python] Add to_pandas to ChunkedArray
xhochy Jun 26, 2018
8c9890c
ARROW-2742: [Python] Allow Table.from_batches to use iterator of reco…
BryanCutler Jun 26, 2018
9a9445a
ARROW-2748: [GLib] Add garrow_decimal_data_type_get_scale() (and _pre…
shiro615 Jun 27, 2018
66c017e
[GLib] Fix a typo
kou Jun 27, 2018
6a70680
ARROW-2753: [GLib] Add garrow_schema_*_field()
kou Jun 27, 2018
5e6c773
ARROW-2752: [GLib] Document garrow_decimal_data_type_new()
kou Jun 27, 2018
9979729
[Python] Fix typo in ipc.rst
richardjgowers Jun 27, 2018
32de512
ARROW-2751: [GLib] Add garrow_table_replace_column()
kou Jun 27, 2018
4a20fc7
ARROW-2683: [Python] Resource Warning (Unclosed File) when using pyar…
kszucs Jun 27, 2018
d38e862
ARROW-2758: [Plasma] Use Scope enum in Plasma
Jun 27, 2018
cac2c07
ARROW-2749: [GLib] Rename *garrow_decimal128_array_get_value to *garr…
shiro615 Jun 28, 2018
6cf5809
ARROW-2755: [Python] Allow using Ninja to build extension
kszucs Jun 28, 2018
c9ce215
ARROW-2747: [Python] Fix huge pages Plasma test
pcmoritz Jun 29, 2018
c56d881
ARROW-2761: [Python] Add support for set operations in hive partition…
Jun 30, 2018
88f6794
ARROW-2763: [Python] Make _metadata file accessible in ParquetDataset
Jun 30, 2018
4b2e52c
ARROW-2335: [Go] move README one directory higher
sbinet Jun 30, 2018
3425104
Update dependencies
trxcllnt Jul 2, 2018
f483567
Merge branch 'js-stream-reader-fixes' of github.com:trxcllnt/arrow in…
trxcllnt Jul 2, 2018
ab02407
silence node's experimental async-iterator warnings
trxcllnt Jul 2, 2018
2c2e694
emit native Buffers in node, else Uint8Arrays
trxcllnt Jul 2, 2018
d229a55
Merge branch 'master' into js-stream-reader-fixes
trxcllnt Jul 2, 2018
c9613cc
ARROW-2781: [Python] Download boost using curl in manylinux1 image
xhochy Jul 2, 2018
35b1f9a
[Python] MNT: Switch manylinux1 image back to latest tag (#2204)
xhochy Jul 2, 2018
ceae13d
ARROW-2344: [Go] Run Go unit tests in Travis CI
sbinet Jul 2, 2018
18f31e2
ARROW-2782: [Plasma] xfail plasma hugepage test
pcmoritz Jul 2, 2018
7deb141
ARROW-1954: [Python] Add metadata accessor to pyarrow.Field
kszucs Jul 3, 2018
42c2101
ARROW-2780: [Go] Run code coverage analysis
sbinet Jul 3, 2018
782c5a5
ARROW-2773: [Python] corrected partition_cols parameter name
danielchalef Jul 3, 2018
c8d97fa
ARROW-2650: [JS] Implement Unions
trxcllnt Jul 4, 2018
e7aaf7b
ARROW-2657: [Python] Import TensorFlow python extension before pyarro…
pcmoritz Jul 4, 2018
037c156
ARROW-2794: [Plasma] Add the RPC of a list of Delete Objects in Plasma
Jul 4, 2018
0175167
ARROW-2795: [Python] Run TensorFlow import workaround only on Linux p…
wesm Jul 5, 2018
d5d39f7
ARROW-2779: [JS] stream reader fixes
trxcllnt Jul 5, 2018
dfad902
Data refactor WIP
trxcllnt Jul 6, 2018
9d1432e
ARROW-2798: [Plasma] Use hashing function that takes into account all…
songqing Jul 6, 2018
148c05c
ARROW-2754: [Python] Change Python setup.py to make release builds by…
wesm Jul 6, 2018
010c874
ARROW-2805: [Python] Use official way to find TensorFlow module
pcmoritz Jul 7, 2018
836afac
ARROW-2634: [Go] Add Go license details to LICENSE.txt
wesm Jul 7, 2018
d7ad70d
ARROW-2802: [Docs] Move all release management instructions to Conflu…
wesm Jul 7, 2018
0933b4f
ARROW-2601: [Python] Prevent user from calling *MemoryPool constructo…
wesm Jul 8, 2018
528cb7d
ARROW-1454: [Python] Also match ArrowNotImplementedError in unsupport…
wesm Jul 8, 2018
735e38b
ARROW-2784: [C++] MemoryMappedFile::WriteAt allow writing past the end
Jul 8, 2018
ae5e4b8
ARROW-2809: [C++] Only print cpplint and clang-format output for fail…
wesm Jul 8, 2018
fbfddfd
ARROW-1722: [C++] Add linting script to find C++/CLI incompatibilities
wesm Jul 9, 2018
ddcb8f5
ARROW-2789: [JS] Add iterator to DataFrame
TheNeuralBit Jul 9, 2018
1a419fe
ARROW-2790: [C++] Buffers can contain uninitialized memory
Jul 9, 2018
8cfa51a
ARROW-2790: [C++] Minor style changes from the review
Jul 9, 2018
b926574
ARROW-2722: [Python] Sanitize dtype number to handle edge cases
pitrou Jul 9, 2018
fa08ddf
[CI] Add missing quote for omittable variable (#2235)
kou Jul 10, 2018
e515ba3
ARROW-2823: [C++] Search for flatbuffers in <root>/lib64
xhochy Jul 10, 2018
d130a00
ARROW-2812: [Ruby] Fix Arrow::Array#[] interface for Arrow::StructArray
joker1007 Jul 10, 2018
a096eb1
ARROW-2821: [C++] Remove redundant memsets in BooleanBuilder
wesm Jul 10, 2018
bd1c332
ARROW-2724: [Packaging] Determine whether all the expected artifacts …
kszucs Jul 10, 2018
bce0ca4
[Packaging] Fix fallback value code in Ruby
kou Jul 11, 2018
113430a
ARROW-2383: [deb] Use system Protocol Buffers
kou Jul 11, 2018
7a413fe
ARROW-2827: [C++] Stop to use -jN in sub make
kou Jul 11, 2018
8ff5db1
Merge branch 'master' into js-data-refactor
trxcllnt Jul 11, 2018
66d545a
Merge branch 'master' into js-operator-visitor
trxcllnt Jul 11, 2018
02c372f
Merge branch 'js-operator-visitor' into js-data-refactor
trxcllnt Jul 11, 2018
dacddbc
public data.length should be logical length (original logical length …
trxcllnt Jul 12, 2018
0eb0f18
refactor visitors, add get and iterator operator visitor stubs
trxcllnt Jul 12, 2018
78d5c8a
slim down vector classes, bind operator visitor methods to the protot…
trxcllnt Jul 12, 2018
e6b8bd1
refactor Types for new visitors, assign the dictionary Vector to the …
trxcllnt Jul 12, 2018
e7326ed
make stride a property on the vector
trxcllnt Jul 16, 2018
bd4c870
flesh out the Type enum, add mappings between Type enum and correspon…
trxcllnt Jul 16, 2018
6be0466
add visitor methods for each DataType, so the generic types flow all …
trxcllnt Jul 16, 2018
8c81ae0
add all the specific Vector subclasses for each DataType, add mapping…
trxcllnt Jul 16, 2018
f132656
add a Visitor to return the DataType constructor for each Type
trxcllnt Jul 16, 2018
d20bcd1
add a Visitor to return the Vector constructor for each Type
trxcllnt Jul 16, 2018
e645572
update the GetVisitor to use the new specific Visitor methods
trxcllnt Jul 16, 2018
a938132
(wip) update IndexOfVisitor
trxcllnt Jul 16, 2018
847695b
WIP
trxcllnt Aug 28, 2018
aec3b30
WIP
trxcllnt Aug 28, 2018
4b259a8
remove dead code before @cwharris digs in
trxcllnt Oct 12, 2018
c198636
Merge branch 'js-data-refactor' of github.com:trxcllnt/arrow into js-…
trxcllnt Oct 12, 2018
49e1959
.
Oct 12, 2018
eb1a809
make it run
trxcllnt Oct 12, 2018
12ca5e5
make test sourcemaps work again
trxcllnt Oct 12, 2018
9a1a66a
fix inference order for ints and floats
trxcllnt Oct 13, 2018
f1c80f9
.
Oct 13, 2018
831ce85
fix the call to the GetVectorConstructor visitor to use the type isnt…
trxcllnt Oct 15, 2018
70c1d5d
update to typescript 3.1.1, clean up visitor type signatures
trxcllnt Oct 15, 2018
b12ad82
a few fixes for ts 3.1.1
trxcllnt Oct 15, 2018
b216887
don't automatically bind the visitFn to the visitor, since presently …
trxcllnt Oct 17, 2018
60549b6
.
Oct 17, 2018
53d6cf0
fix(add new enums to minification exports)
Oct 19, 2018
d3cb781
add a few tests for message reader, work-in-progress recordbatch stre…
trxcllnt Oct 25, 2018
9f00326
Merge branch 'js-data-refactor' of github.com:trxcllnt/arrow into js-…
trxcllnt Oct 25, 2018
e3c694b
fix fromReadableNodeStream wrapper fn definition, type signature of r…
trxcllnt Oct 25, 2018
03c9931
add tests for AsyncMessageReader and reading node + DOM streams
trxcllnt Oct 25, 2018
9cd278b
fix lint
trxcllnt Oct 29, 2018
4143005
make DataType default to NONE
trxcllnt Oct 29, 2018
95ea13f
store dictionary types in schema dictionaries map
trxcllnt Oct 29, 2018
3d8c6fe
move stream adapters into io, split into separate files
trxcllnt Oct 29, 2018
d9a4432
collapse contiguous typed arrays that share the same ArrayBuffer
trxcllnt Nov 2, 2018
936096f
split the stream adapters into separate files
trxcllnt Nov 2, 2018
1d81a97
add initial RandomAccessFile implementations
trxcllnt Nov 2, 2018
d0bbe6f
Add initial ByteStream implementations
trxcllnt Nov 2, 2018
9f9d22e
Add compatability primitive for inferring the correct Arrow readers f…
trxcllnt Nov 2, 2018
06bf8b0
Update the message readers to use the new stream primitives
trxcllnt Nov 2, 2018
477e8eb
Add Reader implementations built on the new file and stream primitives
trxcllnt Nov 2, 2018
532259e
Split the Arrow IPC logic out into more readable and reusable bits
trxcllnt Nov 2, 2018
c71d716
Make DictionaryVector accept its dictionary as a constructor arg for now
trxcllnt Nov 2, 2018
0c63b9c
add some initial tests for the message and recordbatch stream readers
trxcllnt Nov 2, 2018
7b8d6d6
fix typo: add missing continue statements
trxcllnt Nov 2, 2018
5f931fb
separate datasource tests from recordbatch stream reader tests and ad…
trxcllnt Nov 3, 2018
6126e31
general cleanup, remove dead code/comments, add Vector/Column concat
trxcllnt Nov 3, 2018
ff43bd0
refactor recordbatch stream reader test a bit
trxcllnt Nov 4, 2018
e854f22
add generics to the recordbatch readers, update the tests to reflect …
trxcllnt Nov 4, 2018
55fa383
fix the recordbatch file reader
trxcllnt Nov 4, 2018
902dd0b
add nodeToDOMStream utility function that copies each input chunk
trxcllnt Nov 4, 2018
49e463b
a bit of cleanup, and add a tests for the readablestream default reader
trxcllnt Nov 4, 2018
6c4367b
Merge branch 'master' into js-data-refactor-master-merge
trxcllnt Nov 4, 2018
a88c2bb
cleanup post merge
trxcllnt Nov 4, 2018
0a7aeb5
more cleanup, move vector data loader out of the ipc folder, re-enabl…
trxcllnt Nov 4, 2018
51c098b
refactor a bit so I can do the JSON reader. make DictionaryVector poi…
trxcllnt Nov 5, 2018
cfd9b6b
initial implementation of DictionaryEncodeBinaryBuilder.
jlubea Nov 5, 2018
62759a6
rename private variables
trxcllnt Nov 7, 2018
3fcb93c
implement nullable get and indexOf
trxcllnt Nov 12, 2018
6725634
fix lint
trxcllnt Nov 12, 2018
50e0dbc
add json input WIP
trxcllnt Nov 17, 2018
bdbc855
reimplement the RecordBatchJSONReader
trxcllnt Nov 18, 2018
10ba9df
add safety checks before decoding Utf8 values
trxcllnt Nov 19, 2018
ac0cd4a
read buffers out in the proper order
trxcllnt Nov 19, 2018
dfb43c2
make the dictionaries type map to store the Dictionary type, so we ha…
trxcllnt Nov 19, 2018
3b0f65f
add abstract Vector base class, then implement recordbatch, chunkedve…
trxcllnt Nov 19, 2018
983db56
autobind some visitor prototype method implementations (perf)
trxcllnt Nov 19, 2018
8ff9052
add FileHandle test, close all recordbatch iterators
trxcllnt Nov 21, 2018
3512e98
fix subarray args in vector loader
trxcllnt Dec 2, 2018
7aeed9f
move magic into ipc/message/support.ts
trxcllnt Dec 2, 2018
3495585
split out and move message readers into ipc/message
trxcllnt Dec 2, 2018
c370e45
fix io/interfaces typings
trxcllnt Dec 2, 2018
e28ad7a
move io base classes into io/interfaces
trxcllnt Dec 2, 2018
d285671
move abstract readers into ipc/readers
trxcllnt Dec 2, 2018
0f8d437
add recordbatch readable node and DOM stream tests
trxcllnt Dec 2, 2018
e5a6f81
factor out arrow data source, move logic into AbstractRecordBatchRead…
trxcllnt Dec 3, 2018
7c0745a
use vector type to bind prototype methods
trxcllnt Dec 3, 2018
a505013
move fs.ReadStream workaround to readMessageBody impl, point to issue…
trxcllnt Dec 3, 2018
e5cd619
update esm dev dependency
trxcllnt Dec 5, 2018
6973289
cleanup io primitives and readers
trxcllnt Dec 5, 2018
0254e5c
peek at the first 8 bytes to ensure subsequent reads are aligned in n…
trxcllnt Dec 6, 2018
da469f4
rename ByteStream and AsyncByteStream to ByteSource and AsyncByteSource
trxcllnt Dec 6, 2018
dbac361
update dependencies
trxcllnt Dec 7, 2018
e434b34
clean up io, add writable stream bases
trxcllnt Dec 7, 2018
eb01385
misc cleanup
trxcllnt Dec 7, 2018
1148847
further cleanup of the readers, does composition a bit more like in C++
trxcllnt Dec 7, 2018
49ddfca
remove unused file
trxcllnt Dec 7, 2018
107ff4f
add more jest matchers + jest matcher type definitions
trxcllnt Dec 8, 2018
1c7b643
change RBR.open to RBR.from
trxcllnt Dec 8, 2018
f0b5c4c
add the vectorassembler back in for the writer
trxcllnt Dec 8, 2018
4702f2c
make the iterator visitor actually return an iterator
trxcllnt Dec 8, 2018
4690908
fix lint
trxcllnt Dec 8, 2018
dede789
rename base ReaderImpl class
trxcllnt Dec 8, 2018
f0be178
refactor helper methods into util
trxcllnt Dec 9, 2018
feb1924
add createDOMTransformStream helper fn
trxcllnt Dec 9, 2018
653ec16
add package-lock.json
trxcllnt Dec 9, 2018
5a76876
refactor the stream conversion code into dom and node entry points so…
trxcllnt Dec 9, 2018
288995e
rename the Row type to RowLike
trxcllnt Dec 9, 2018
0fd8faf
cleanup
trxcllnt Dec 9, 2018
279321a
also listen for close event in case the source readable doesn't call …
trxcllnt Dec 9, 2018
6c2d2f4
add babelrc file so jest can require() ESModules from node_modules
trxcllnt Dec 9, 2018
532a2b6
add concat dom stream test for reading multiple tables from the same …
trxcllnt Dec 9, 2018
a0d3b06
add support for creating a recordbatchreader from a fetch response
trxcllnt Dec 9, 2018
be062d6
use nodeToDOMStream that copies instead of nodeToWeb which doesn't
trxcllnt Dec 9, 2018
18e053d
cleanup stream interface imports
trxcllnt Dec 10, 2018
503aa39
initial stream writer commit
trxcllnt Dec 10, 2018
d02ff03
Merge branch 'js-data-refactor' into js-data-refactor-builders
jlubea Dec 10, 2018
7dd0f15
New builders (need testing).
jlubea Dec 10, 2018
551b5ac
rename the through stream factory methods, add through methods for th…
trxcllnt Dec 10, 2018
e4a9b83
Merge branch 'js-data-refactor' of https://github.com/trxcllnt/arrow …
jlubea Dec 10, 2018
bbba4fa
update ts-jest config to remove babel and make debugging async code e…
trxcllnt Dec 12, 2018
bb1d449
refactor io primitives to more closely mimic whatwg APIs, add RecordB…
trxcllnt Dec 12, 2018
ab335e7
fix typo
trxcllnt Dec 12, 2018
3bcadb3
make names consistent
trxcllnt Dec 12, 2018
36dd0a7
ensure generateDictionaryMap finds nested dictionary-encoded types
trxcllnt Dec 12, 2018
abfb026
remove unnecessary check (done somewhere else)
trxcllnt Dec 12, 2018
fbc184d
add sync flag to get get a buffer of the current state from an AsyncB…
trxcllnt Dec 12, 2018
7ef4e4d
add toEqualTable custom jest matcher
trxcllnt Dec 12, 2018
16d4031
reenable Table.serialize()
trxcllnt Dec 12, 2018
df2ec24
update integration/validate-tests.ts
trxcllnt Dec 12, 2018
8ee0b4e
fix for reading from misbehaving node streams
trxcllnt Dec 13, 2018
4b3bbb9
fix reading dictionary types in a way tha tsupports streaming
trxcllnt Dec 13, 2018
a354404
add back in the vector strides based on the type information
trxcllnt Dec 13, 2018
3e156b9
update integration tests and runners to pass in arrow json, file, and…
trxcllnt Dec 13, 2018
070ec98
ensure duplicate column names don't throw errors during row proxy con…
trxcllnt Dec 13, 2018
cea9e45
Add an official JS vscode launch config, since it's the only good edi…
trxcllnt Dec 13, 2018
64f65ee
move ipc reader and writer tests into their own folders
trxcllnt Dec 13, 2018
da8a569
rename reader/writer tests, remove unused io class, make writer mimic…
trxcllnt Dec 13, 2018
717d316
update integration bin helpers
trxcllnt Dec 13, 2018
192d834
tweak reader not to try/catch schema if autoClose is true
trxcllnt Dec 13, 2018
016b838
cleanup
trxcllnt Dec 13, 2018
e3444bd
Merge branch 'master' into js-data-refactor-rebased
trxcllnt Dec 13, 2018
ff1076c
update multi recordbatch table fn to use the same schema across batches
trxcllnt Dec 13, 2018
798931e
add a helpful comment
trxcllnt Dec 13, 2018
73fe17a
clean up, remove dead code
trxcllnt Dec 13, 2018
83e5d98
Merge branch 'js-data-refactor' of https://github.com/trxcllnt/arrow …
jlubea Dec 13, 2018
a73152f
intermediate commit for unit tests for builders
jlubea Dec 13, 2018
e2e9ec2
add vector.set back in, reenable most of the tests, minor fixes and c…
trxcllnt Dec 13, 2018
34778c5
Merge branch 'js-data-refactor' of https://github.com/trxcllnt/arrow …
jlubea Dec 13, 2018
d78e778
DictionaryEncodeBinaryBuilder unit test 'should encode an array as bi…
jlubea Dec 13, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion js/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ npm-debug.log*
yarn-debug.log*
yarn-error.log*

.vscode
.vscode/**
!.vscode/launch.json

# Runtime data
pids
Expand Down
155 changes: 155 additions & 0 deletions js/.vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "Debug Gulp Build",
"program": "${workspaceFolder}/node_modules/gulp/bin/gulp.js",
"args": [
"build",
// Specify we want to debug the "src" target, which won't clean or build -- essentially a "dry-run" of the gulp build
"--target", "src"
]
},
{
"type": "node",
"request": "launch",
"name": "Debug Unit Tests",
"cwd": "${workspaceRoot}",
"program": "${workspaceFolder}/node_modules/.bin/jest",
"skipFiles": [
"<node_internals>/**/*.js",
"${workspaceFolder}/node_modules/**/*.js"
],
"env": {
"NODE_NO_WARNINGS": "1",
"READABLE_STREAM": "disable",
"TEST_DOM_STREAMS": "true",
"TEST_NODE_STREAMS": "true",
"TEST_FILE_NAMES": "primitive-empty primitive simple datetime decimal dictionary nested struct_example",
// Modify these environment variables to run tests on a specific compilation target + module format combo
"TEST_TS_SOURCE": "true",
// "TEST_TS_SOURCE": "false",
// "TEST_TARGET": "es5",
// "TEST_MODULE": "cjs"
},
"args": [
// "-i",
"test/unit/",
"test/integration/",

// Uncomment any of these to run individual test suites
// "test/unit/int-tests.ts",
// "test/unit/table-tests.ts",

// "test/unit/vector/vector-tests.ts",
// "test/unit/vector/bool-vector-tests.ts",
// "test/unit/vector/date-vector-tests.ts",
// "test/unit/vector/float16-vector-tests.ts",
// "test/unit/vector/numeric-vector-tests.ts",

// "test/unit/ipc/message-reader-tests.ts",
// "test/unit/ipc/reader/file-reader-tests.ts",
// "test/unit/ipc/reader/json-reader-tests.ts",
// "test/unit/ipc/reader/from-inference-tests.ts",
// "test/unit/ipc/reader/stream-reader-tests.ts",
// "test/unit/ipc/reader/streams-dom-tests.ts",
// "test/unit/ipc/reader/streams-node-tests.ts",
// "test/unit/ipc/writer/writer-tests.ts",
// "test/unit/ipc/writer/streams-dom-tests.ts",
// "test/unit/ipc/writer/streams-node-tests.ts",
]
},
{
"type": "node",
"request": "launch",
"name": "Debug Integration Tests",
"cwd": "${workspaceRoot}",
"program": "${workspaceFolder}/node_modules/.bin/jest",
"skipFiles": [
"<node_internals>/**/*.js",
"${workspaceFolder}/node_modules/**/*.js"
],
"env": {
"TEST_TS_SOURCE": "true",
"NODE_NO_WARNINGS": "1",
"READABLE_STREAM": "disable",
"TEST_DOM_STREAMS": "true",
"TEST_NODE_STREAMS": "true",
"TEST_FILE_NAMES": "primitive-empty primitive simple datetime decimal dictionary nested struct_example"
},
"args": [
// "-i",
"test/integration/*"
]
},
{
"type": "node",
"request": "launch",
"name": "Debug bin/file-to-stream",
"env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
"runtimeArgs": ["-r", "ts-node/register"],
"skipFiles": [
"<node_internals>/**/*.js",
"${workspaceFolder}/node_modules/**/*.js"
],
"args": [
"${workspaceFolder}/bin/file-to-stream.js",
"./test/data/cpp/file/struct_example.arrow",
"./struct_example-stream-out.arrow",
]
},
{
"type": "node",
"request": "launch",
"name": "Debug bin/stream-to-file",
"env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
"runtimeArgs": ["-r", "ts-node/register"],
"skipFiles": [
"<node_internals>/**/*.js",
"${workspaceFolder}/node_modules/**/*.js"
],
"args": [
"${workspaceFolder}/bin/stream-to-file.js",
"./test/data/cpp/stream/struct_example.arrow",
"./struct_example-file-out.arrow",
]
},
{
"type": "node",
"request": "launch",
"name": "Debug bin/json-to-arrow",
"env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
"runtimeArgs": ["-r", "ts-node/register"],
"skipFiles": [
"<node_internals>/**/*.js",
"${workspaceFolder}/node_modules/**/*.js"
],
"args": [
"${workspaceFolder}/bin/json-to-arrow.js",
"-j", "./test/data/json/struct_example.json",
"-a", "./struct_example-stream-out.arrow",
"-f", "stream"
]
},
{
"type": "node",
"request": "launch",
"name": "Debug bin/print-buffer-alignment",
"env": { "ARROW_JS_DEBUG": "src", "TS_NODE_CACHE": "false" },
"runtimeArgs": ["-r", "ts-node/register"],
"skipFiles": [
"<node_internals>/**/*.js",
"${workspaceFolder}/node_modules/**/*.js"
],
"args": [
"${workspaceFolder}/bin/print-buffer-alignment.js",
"./test/data/cpp/stream/struct_example.arrow"
]
}
]
}
27 changes: 15 additions & 12 deletions js/bin/file-to-stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,24 @@
// specific language governing permissions and limitations
// under the License.

// @ts-check

const fs = require('fs');
const path = require('path');

const encoding = 'binary';
const ext = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '';
const { util: { PipeIterator } } = require(`../index${ext}`);
const { Table, serializeStream, fromReadableStream } = require(`../index${ext}`);
const eos = require('util').promisify(require('stream').finished);
const extension = process.env.ARROW_JS_DEBUG === 'src' ? '.ts' : '';
const { RecordBatchReader, RecordBatchStreamWriter } = require(`../index${extension}`);

(async () => {
// Todo (ptaylor): implement `serializeStreamAsync` that accepts an
// AsyncIterable<Buffer>, rather than aggregating into a Table first
const in_ = process.argv.length < 3
? process.stdin : fs.createReadStream(path.resolve(process.argv[2]));
const out = process.argv.length < 4
? process.stdout : fs.createWriteStream(path.resolve(process.argv[3]));
new PipeIterator(serializeStream(await Table.fromAsync(fromReadableStream(in_))), encoding).pipe(out);

const readable = process.argv.length < 3 ? process.stdin : fs.createReadStream(path.resolve(process.argv[2]));
const writable = process.argv.length < 4 ? process.stdout : fs.createWriteStream(path.resolve(process.argv[3]));

const fileToStream = readable
.pipe(RecordBatchReader.throughNode())
.pipe(RecordBatchStreamWriter.throughNode())
.pipe(writable);

await eos(fileToStream);

})().catch((e) => { console.error(e); process.exit(1); });
124 changes: 80 additions & 44 deletions js/bin/integration.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,60 +17,92 @@
// specific language governing permissions and limitations
// under the License.

// @ts-check

const fs = require('fs');
const glob = require('glob');
const path = require('path');
const { promisify } = require('util');
const glob = promisify(require('glob'));
const { zip } = require('ix/iterable/zip');
const child_process = require(`child_process`);
const asyncDone = promisify(require('async-done'));
const argv = require(`command-line-args`)(cliOpts(), { partial: true });
const gulpPath = require.resolve(path.join(`..`, `node_modules/gulp/bin/gulp.js`));

let jsonPaths = [...(argv.json || [])];
let arrowPaths = [...(argv.arrow || [])];

if (!argv.mode) {
return print_usage();
const exists = async (p) => {
try {
return !!(await fs.promises.stat(p));
} catch (e) { return false; }
}

let mode = argv.mode.toUpperCase();
if (mode === 'VALIDATE' && !jsonPaths.length) {
jsonPaths = glob.sync(path.resolve(__dirname, `../test/data/json/`, `*.json`));
if (!arrowPaths.length) {
[jsonPaths, arrowPaths] = jsonPaths.reduce(([jsonPaths, arrowPaths], jsonPath) => {
const { name } = path.parse(jsonPath);
for (const source of ['cpp', 'java']) {
for (const format of ['file', 'stream']) {
const arrowPath = path.resolve(__dirname, `../test/data/${source}/${format}/${name}.arrow`);
if (fs.existsSync(arrowPath)) {
jsonPaths.push(jsonPath);
arrowPaths.push(arrowPath);
}
}
}
return [jsonPaths, arrowPaths];
}, [[], []]);
console.log(`jsonPaths: [\n\t${jsonPaths.join('\n\t')}\n]`);
console.log(`arrowPaths: [\n\t${arrowPaths.join('\n\t')}\n]`);
(async () => {

if (!argv.mode) {
return print_usage();
}

let mode = argv.mode.toUpperCase();
let jsonPaths = [...(argv.json || [])];
let filePaths = [...(argv.file || [])];
let streamPaths = [...(argv.stream || [])];

if (mode === 'VALIDATE' && !jsonPaths.length) {
jsonPaths = await glob(path.resolve(__dirname, `../test/data/json/`, `*.json`));
if (!filePaths.length) {
[jsonPaths, filePaths] = await loadJSONAndArrowPaths(jsonPaths, 'cpp', 'file');
[jsonPaths, filePaths] = await loadJSONAndArrowPaths(jsonPaths, 'java', 'file');
}
if (!streamPaths.length) {
[jsonPaths, filePaths] = await loadJSONAndArrowPaths(jsonPaths, 'cpp', 'stream');
[jsonPaths, filePaths] = await loadJSONAndArrowPaths(jsonPaths, 'java', 'stream');
}
for (let [jsonPath, filePath, streamPath] of zip(jsonPaths, filePaths, streamPaths)) {
console.log(`jsonPath: ${jsonPath}`);
console.log(`filePath: ${filePath}`);
console.log(`streamPath: ${streamPath}`);
}
}
} else if (!jsonPaths.length) {
return print_usage();
}

switch (mode) {
case 'VALIDATE':
const args = [`test`, `-i`].concat(argv._unknown || []);
jsonPaths.forEach((p, i) => {
args.push('-j', p, '-a', arrowPaths[i]);
});
process.exitCode = child_process.spawnSync(
gulpPath, args,
{
if (!jsonPaths.length) {
return print_usage();
}

switch (mode) {
case 'VALIDATE':

const args = [`test`, `-i`].concat(argv._unknown || []);
const gulp = require.resolve(path.join(__dirname, `../node_modules/gulp/bin/gulp.js`));

for (let [jsonPath, filePath, streamPath] of zip(jsonPaths, filePaths, streamPaths)) {
args.push('-j', jsonPath, '-f', filePath, '-s', streamPath);
}

await asyncDone(() => child_process.spawn(gulp, args, {
cwd: path.resolve(__dirname, '..'),
stdio: ['ignore', 'inherit', 'inherit']
}
).status || process.exitCode || 0;
break;
default:
print_usage();
}));

break;
default:
return print_usage();
}
})()
.then((x) => +x || 0, (e) => {
e && process.stderr.write(`${e}`);
return process.exitCode || 1;
}).then((code) => process.exit(code));

async function loadJSONAndArrowPaths(jsonPaths, source, format) {
const jPaths = [];
const aPaths = [];
for (const jsonPath of jsonPaths) {
const { name } = path.parse(jsonPath);
const arrowPath = path.resolve(__dirname, `../test/data/${source}/${format}/${name}.arrow`);
if (await exists(arrowPath)) {
jPaths.push(jsonPath);
aPaths.push(arrowPath);
}
}
return [jPaths, aPaths];
}

function cliOpts() {
Expand All @@ -80,6 +112,10 @@ function cliOpts() {
name: 'mode',
description: 'The integration test to run'
},
{ name: 'json', alias: 'j', type: String, multiple: true, defaultValue: [], description: 'The JSON file[s] to read/write' },
{ name: 'file', alias: 'f', type: String, multiple: true, defaultValue: [], description: 'The Arrow file[s] to read/write' },
{ name: 'stream', alias: 's', type: String, multiple: true, defaultValue: [], description: 'The Arrow stream[s] to read/write' },

{
type: String,
name: 'arrow', alias: 'a',
Expand Down Expand Up @@ -118,5 +154,5 @@ function print_usage() {
]
},
]));
process.exit(1);
return 1;
}
Loading