Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
6ddfaf8
narrow the FloatVector.from() return signatures
trxcllnt Feb 12, 2019
5dfc100
make the abstract Vector a type alias to trick TS into letting us ove…
trxcllnt Feb 12, 2019
97c349e
add nullable and metadata getters to the Column class
trxcllnt Feb 12, 2019
db39031
add public Field#clone impl for convenience
trxcllnt Feb 12, 2019
0407cd7
add Column.new() convenience method for creating Columns with string …
trxcllnt Feb 12, 2019
9d1f2ad
add Table.new() convenience method for creating Tables from Columns o…
trxcllnt Feb 12, 2019
bdb23b8
ensure uniform chunk lengths in RecordBatch.from()
trxcllnt Feb 12, 2019
933b531
Narrow the signature of Schema#fields to Field<T[keyof T]>[], cleanup
trxcllnt Feb 12, 2019
c9eeb05
fix lint
trxcllnt Feb 12, 2019
3f16c81
fix typo
trxcllnt Feb 12, 2019
8218f40
Ensure Chunked#slice() range end is correct when there's only a singl…
trxcllnt Feb 12, 2019
85d0e00
fix uniform chunk distribution when the new chunks are longer than th…
trxcllnt Feb 12, 2019
9d8f493
cleanup: use the specialized typed array casting functions
trxcllnt Feb 12, 2019
b2153aa
ensure the Vector map types always fall back to BaseVector
trxcllnt Feb 12, 2019
cf6f97a
add TypeComparator visitor so we can compare Schemas, Fields, and Dat…
trxcllnt Feb 13, 2019
0ac786c
add selectAt() method to Table, Schema, and RecordBatch for selecting…
trxcllnt Feb 13, 2019
99e8888
add Table and Schema assign() impls
trxcllnt Feb 13, 2019
e4a5d87
split out the generated data validators for reuse
trxcllnt Feb 13, 2019
07a2c96
add basic Table#assign tests
trxcllnt Feb 13, 2019
41aa902
Add more tests to ensure Tables can serialize through various slice, …
trxcllnt Feb 13, 2019
7a8daad
clean up/speed up: move common argument flattening methods into a uti…
trxcllnt Feb 13, 2019
a67bd56
clean up: eliminate more getters in favor of read-only properties
trxcllnt Feb 13, 2019
83de5ed
guard against out-of-bounds selections
trxcllnt Feb 13, 2019
73b8af7
fix Int64Vector typings
trxcllnt Feb 13, 2019
dc80143
remove Table.fromVectors in favor of Table.new
trxcllnt Feb 18, 2019
98c8e52
add initial RecordBatch.new and select tests
trxcllnt Feb 18, 2019
5b6d938
cleanup
trxcllnt Feb 18, 2019
bfbcc8b
cleanup/rename Table + Schema + RecordBatch from -> new, cleanup argu…
trxcllnt Feb 18, 2019
1758063
add Table.new docstring
trxcllnt Feb 20, 2019
4b8fb54
add a test for table and recordbatch with a single column
trxcllnt Feb 21, 2019
9943d9c
fix lint
trxcllnt Feb 21, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions js/.vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@
// "test/unit/table-tests.ts",
// "test/unit/generated-data-tests.ts",

// "test/unit/table/assign-tests.ts",
// "test/unit/table/serialize-tests.ts",
// "test/unit/recordbatch/record-batch-tests.ts",

// "test/unit/vector/vector-tests.ts",
// "test/unit/vector/bool-vector-tests.ts",
// "test/unit/vector/date-vector-tests.ts",
Expand Down
2 changes: 1 addition & 1 deletion js/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ const rainDates = Array.from(
{ length: LENGTH },
(_, i) => new Date(Date.now() - 1000 * 60 * 60 * 24 * i));

const rainfall = Table.fromVectors(
const rainfall = Table.new(
[FloatVector.from(rainAmounts), DateVector.from(rainDates)],
['precipitation', 'date']
);
Expand Down
28 changes: 26 additions & 2 deletions js/src/column.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
// specific language governing permissions and limitations
// under the License.

import { Data } from './data';
import { Field } from './schema';
import { Vector } from './vector';
import { DataType } from './type';
import { Vector } from './vector';
import { VectorCtorArgs, Vector as V } from './interfaces';
import { Clonable, Sliceable, Applicative } from './vector';
import { Chunked, SearchContinuation } from './vector/chunked';

Expand All @@ -34,8 +36,28 @@ export class Column<T extends DataType = any>
Sliceable<Column<T>>,
Applicative<T, Column<T>> {

public static new<T extends DataType>(field: string | Field<T>, ...chunks: (Vector<T> | Vector<T>[])[]): Column<T>;
public static new<T extends DataType>(field: string | Field<T>, data: Data<T>, ...args: VectorCtorArgs<V<T>>): Column<T>;
/** @nocollapse */
public static new<T extends DataType = any>(field: string | Field<T>, data: Data<T> | Vector<T> | (Data<T> | Vector<T>)[], ...rest: any[]) {

const chunks = Chunked.flatten<T>(
Array.isArray(data) ? [...data, ...rest] :
data instanceof Vector ? [data, ...rest] :
[Vector.new(data, ...rest)]
);

if (typeof field === 'string') {
const type = chunks[0].data.type;
field = new Field(field, type, chunks.some(({ nullCount }) => nullCount > 0));
} else if (!field.nullable && chunks.some(({ nullCount }) => nullCount > 0)) {
field = field.clone({ nullable: true });
}
return new Column(field, chunks);
}

constructor(field: Field<T>, vectors: Vector<T>[] = [], offsets?: Uint32Array) {
vectors = Chunked.flatten(...vectors);
vectors = Chunked.flatten<T>(...vectors);
super(field.type, vectors, offsets);
this._field = field;
if (vectors.length === 1 && !(this instanceof SingleChunkColumn)) {
Expand All @@ -48,6 +70,8 @@ export class Column<T extends DataType = any>

public get field() { return this._field; }
public get name() { return this._field.name; }
public get nullable() { return this._field.nullable; }
public get metadata() { return this._field.metadata; }

public clone(chunks = this._chunks) {
return new Column(this._field, chunks);
Expand Down
7 changes: 4 additions & 3 deletions js/src/compute/dataframe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,12 @@ export class DataFrame<T extends { [key: string]: DataType } = any> extends Tabl

export class CountByResult<T extends DataType = any, TCount extends Int = Int> extends Table<{ values: T, counts: TCount }> {
constructor(values: Vector<T>, counts: V<TCount>) {
const schema = new Schema<{ values: T, counts: TCount }>([
type R = { values: T, counts: TCount };
const schema = new Schema<R>([
new Field('values', values.type),
new Field('counts', counts.type)
]);
super(new RecordBatch(schema, counts.length, [values, counts]));
super(new RecordBatch<R>(schema, counts.length, [values, counts]));
}
public toJSON(): Object {
const values = this.getColumnAt(0)!;
Expand All @@ -100,7 +101,7 @@ export class CountByResult<T extends DataType = any, TCount extends Int = Int> e
}
}

export class FilteredDataFrame<T extends { [key: string]: DataType; } = any> extends DataFrame<T> {
export class FilteredDataFrame<T extends { [key: string]: DataType } = any> extends DataFrame<T> {
private _predicate: Predicate;
constructor (batches: RecordBatch<T>[], predicate: Predicate) {
super(batches);
Expand Down
56 changes: 36 additions & 20 deletions js/src/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@
// under the License.

import { Vector } from './vector';
import { truncateBitmap } from './util/bit';
import { popcnt_bit_range } from './util/bit';
import { toArrayBufferView } from './util/buffer';
import { DataType, SparseUnion, DenseUnion } from './type';
import { VectorType as BufferType, UnionMode, Type } from './enum';
import { toArrayBufferView, toUint8Array, toInt32Array } from './util/buffer';
import {
Dictionary,
Null, Int, Float,
Expand Down Expand Up @@ -139,6 +140,21 @@ export class Data<T extends DataType = DataType> {
(!childData.length || this.valueOffsets) ? childData : this._sliceChildren(childData, childStride * offset, childStride * length));
}

public _changeLengthAndBackfillNullBitmap(newLength: number): Data<T> {
const { length, nullCount } = this;
// start initialized with 0s (nulls), then fill from 0 to length with 1s (not null)
const bitmap = new Uint8Array(((newLength + 63) & ~63) >> 3).fill(255, 0, length >> 3);
// set all the bits in the last byte (up to bit `length - length % 8`) to 1 (not null)
bitmap[length >> 3] = (1 << (length - (length & ~7))) - 1;
// if we have a nullBitmap, truncate + slice and set it over the pre-filled 1s
if (nullCount > 0) {
bitmap.set(truncateBitmap(this.offset, length, this.nullBitmap), 0);
}
const buffers = this.buffers;
buffers[BufferType.VALIDITY] = bitmap;
return this.clone(this.type, 0, newLength, nullCount + (newLength - length), buffers);
}

protected _sliceBuffers(offset: number, length: number, stride: number, typeId: T['TType']): Buffers<T> {
let arr: any, { buffers } = this;
// If typeIds exist, slice the typeIds buffer
Expand All @@ -159,85 +175,85 @@ export class Data<T extends DataType = DataType> {
//
/** @nocollapse */
public static Null<T extends Null>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer) {
return new Data(type, offset, length, nullCount, [undefined, undefined, toArrayBufferView(Uint8Array, nullBitmap)]);
return new Data(type, offset, length, nullCount, [undefined, undefined, toUint8Array(nullBitmap)]);
}
/** @nocollapse */
public static Int<T extends Int>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
}
/** @nocollapse */
public static Dictionary<T extends Dictionary>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView<T['TArray']>(type.indices.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView<T['TArray']>(type.indices.ArrayType, data), toUint8Array(nullBitmap)]);
}
/** @nocollapse */
public static Float<T extends Float>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
}
/** @nocollapse */
public static Bool<T extends Bool>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
}
/** @nocollapse */
public static Decimal<T extends Decimal>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
}
/** @nocollapse */
public static Date<T extends Date_>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
}
/** @nocollapse */
public static Time<T extends Time>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
}
/** @nocollapse */
public static Timestamp<T extends Timestamp>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
}
/** @nocollapse */
public static Interval<T extends Interval>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
}
/** @nocollapse */
public static FixedSizeBinary<T extends FixedSizeBinary>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toArrayBufferView(Uint8Array, nullBitmap)]);
return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
}
/** @nocollapse */
public static Binary<T extends Binary>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, data: Uint8Array) {
return new Data(type, offset, length, nullCount, [toArrayBufferView(Int32Array, valueOffsets), toArrayBufferView(Uint8Array, data), toArrayBufferView(Uint8Array, nullBitmap)]);
return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), toUint8Array(data), toUint8Array(nullBitmap)]);
}
/** @nocollapse */
public static Utf8<T extends Utf8>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, data: Uint8Array) {
return new Data(type, offset, length, nullCount, [toArrayBufferView(Int32Array, valueOffsets), toArrayBufferView(Uint8Array, data), toArrayBufferView(Uint8Array, nullBitmap)]);
return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), toUint8Array(data), toUint8Array(nullBitmap)]);
}
/** @nocollapse */
public static List<T extends List>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, child: Data<T['valueType']> | Vector<T['valueType']>) {
return new Data(type, offset, length, nullCount, [toArrayBufferView(Int32Array, valueOffsets), undefined, toArrayBufferView(Uint8Array, nullBitmap)], [child]);
return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), undefined, toUint8Array(nullBitmap)], [child]);
}
/** @nocollapse */
public static FixedSizeList<T extends FixedSizeList>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, child: Data | Vector) {
return new Data(type, offset, length, nullCount, [undefined, undefined, toArrayBufferView(Uint8Array, nullBitmap)], [child]);
return new Data(type, offset, length, nullCount, [undefined, undefined, toUint8Array(nullBitmap)], [child]);
}
/** @nocollapse */
public static Struct<T extends Struct>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, children: (Data | Vector)[]) {
return new Data(type, offset, length, nullCount, [undefined, undefined, toArrayBufferView(Uint8Array, nullBitmap)], children);
return new Data(type, offset, length, nullCount, [undefined, undefined, toUint8Array(nullBitmap)], children);
}
/** @nocollapse */
public static Map<T extends Map_>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, children: (Data | Vector)[]) {
return new Data(type, offset, length, nullCount, [undefined, undefined, toArrayBufferView(Uint8Array, nullBitmap)], children);
return new Data(type, offset, length, nullCount, [undefined, undefined, toUint8Array(nullBitmap)], children);
}
public static Union<T extends SparseUnion>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, children: (Data | Vector)[]): Data<T>;
public static Union<T extends DenseUnion>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsets: ValueOffsetsBuffer, children: (Data | Vector)[]): Data<T>;
/** @nocollapse */
public static Union<T extends Union>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsetsOrChildren: ValueOffsetsBuffer | (Data | Vector)[], children?: (Data | Vector)[]) {
const buffers = <unknown> [
undefined, undefined,
toArrayBufferView(Uint8Array, nullBitmap),
toUint8Array(nullBitmap),
toArrayBufferView(type.ArrayType, typeIds)
] as Partial<Buffers<T>>;
if (type.mode === UnionMode.Sparse) {
return new Data(type, offset, length, nullCount, buffers, valueOffsetsOrChildren as (Data | Vector)[]);
}
buffers[BufferType.OFFSET] = toArrayBufferView(Int32Array, <ValueOffsetsBuffer> valueOffsetsOrChildren);
buffers[BufferType.OFFSET] = toInt32Array(<ValueOffsetsBuffer> valueOffsetsOrChildren);
return new Data(type, offset, length, nullCount, buffers, children);
}
}
Expand Down
6 changes: 3 additions & 3 deletions js/src/interfaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ export type VectorCtorType<
export type Vector<T extends Type | DataType = any> =
T extends Type ? TypeToVector<T> :
T extends DataType ? DataTypeToVector<T> :
never
vecs.BaseVector<any>
;

/** @ignore */
Expand Down Expand Up @@ -186,13 +186,13 @@ type DataTypeToVector<T extends DataType = any> =
T extends type.Map_ ? vecs.MapVector<T['dataTypes']> :
T extends type.List ? vecs.ListVector<T['valueType']> :
T extends type.Struct ? vecs.StructVector<T['dataTypes']> :
T extends type.Dictionary ? vecs.DictionaryVector<T['valueType'], T['indices']> :
T extends type.Dictionary ? vecs.DictionaryVector<T['valueType'], T['indices']> :
T extends type.FixedSizeList ? vecs.FixedSizeListVector<T['valueType']> :
vecs.BaseVector<T>
;

/** @ignore */
type TypeToDataType<T extends Type> =
export type TypeToDataType<T extends Type> =
T extends Type.Null ? type.Null
: T extends Type.Bool ? type.Bool
: T extends Type.Int ? type.Int
Expand Down
2 changes: 1 addition & 1 deletion js/src/ipc/node/reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.

import { Duplex, DuplexOptions } from 'stream';
import { DataType } from '../../type';
import { Duplex, DuplexOptions } from 'stream';
import { RecordBatch } from '../../recordbatch';
import { AsyncByteQueue } from '../../io/stream';
import { RecordBatchReader } from '../../ipc/reader';
Expand Down
2 changes: 1 addition & 1 deletion js/src/ipc/node/writer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.

import { Duplex, DuplexOptions } from 'stream';
import { DataType } from '../../type';
import { Duplex, DuplexOptions } from 'stream';
import { AsyncByteStream } from '../../io/stream';
import { RecordBatchWriter } from '../../ipc/writer';

Expand Down
2 changes: 1 addition & 1 deletion js/src/ipc/reader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.

import { DataType } from '../type';
import { Vector } from '../vector';
import { DataType } from '../type';
import { MessageHeader } from '../enum';
import { Footer } from './metadata/file';
import { Schema, Field } from '../schema';
Expand Down
2 changes: 1 addition & 1 deletion js/src/ipc/writer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ export class RecordBatchWriter<T extends { [key: string]: DataType } = any> exte
this._dictionaryBlocks = [];
this._recordBatchBlocks = [];

if (!schema || (schema !== this._schema)) {
if (!schema || !(schema.compareTo(this._schema))) {
if (schema === null) {
this._position = 0;
this._schema = null;
Expand Down
Loading