Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions js/perf/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ console.time('Prepare Data');

const LENGTH = 100000;
const NUM_BATCHES = 10;
const cities = ['Charlottesville', 'New York', 'San Francisco', 'Seattle', 'Terre Haute', 'Washington, DC'];

const values = Arrow.vectorFromArray(['Charlottesville', 'New York', 'San Francisco', 'Seattle', 'Terre Haute', 'Washington, DC']);
const values = Arrow.vectorFromArray(cities);

const batches = Array.from({ length: NUM_BATCHES }).map(() => {
const lat = Float32Array.from(
Expand Down Expand Up @@ -57,14 +58,35 @@ const batches = Array.from({ length: NUM_BATCHES }).map(() => {
});
});

export const typedArrays = {
uint8Array: Uint8Array.from({ length: LENGTH }, () => Math.random() * 255),
uint16Array: Uint16Array.from({ length: LENGTH }, () => Math.random() * 255),
uint32Array: Uint32Array.from({ length: LENGTH }, () => Math.random() * 255),
uint64Array: BigUint64Array.from({ length: LENGTH }, () => 42n),

int8Array: Int8Array.from({ length: LENGTH }, () => Math.random() * 255),
int16Array: Int16Array.from({ length: LENGTH }, () => Math.random() * 255),
int32Array: Int32Array.from({ length: LENGTH }, () => Math.random() * 255),
int64Array: BigInt64Array.from({ length: LENGTH }, () => 42n),

float32Array: Float32Array.from({ length: LENGTH }, () => Math.random() * 255),
float64Array: Float64Array.from({ length: LENGTH }, () => Math.random() * 255)
};

export const arrays = {
numbers: Array.from({ length: LENGTH }, () => Math.random() * 255),
booleans: Array.from({ length: LENGTH }, () => Math.random() > 0.5),
strings: Array.from({ length: LENGTH }, () => cities[Math.floor(Math.random() * cities.length)])
};

const tracks = new Arrow.Table(batches[0].schema, batches);

console.timeEnd('Prepare Data');

export default [
{
name: 'tracks',
df: tracks,
table: tracks,
ipc: Arrow.RecordBatchStreamWriter.writeAll(tracks).toUint8Array(true),
countBys: ['origin', 'destination'],
counts: [
Expand Down
104 changes: 30 additions & 74 deletions js/perf/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,12 @@

import * as Arrow from '../src/Arrow';

import config from './config';
import config, { arrays, typedArrays } from './config';
import b from 'benny';
import { CaseResult, Summary } from 'benny/lib/internal/common-types';
import kleur from 'kleur';

const { RecordBatchReader, RecordBatchStreamWriter } = Arrow;
// const { predicate } = Arrow;
// const { col } = Arrow.predicate;


const args = process.argv.slice(2);
Expand All @@ -55,7 +53,23 @@ function cycle(result: CaseResult, _summary: Summary) {
);
}

for (const { name, ipc, df } of config) {
b.suite(
`Vector`,

...Object.entries(typedArrays).map(([name, array]) =>
b.add(`make vector from ${name}`, () => {
Arrow.makeVector(array);
})),

...Object.entries(arrays).map(([name, array]) =>
b.add(`make vector from ${name}`, () => {
Arrow.vectorFromArray(array as any);
})),

b.cycle(cycle)
);

for (const { name, ipc, table } of config) {
b.suite(
`Parse`,

Expand All @@ -64,13 +78,13 @@ for (const { name, ipc, df } of config) {
}),

b.add(`dataset: ${name}, function: write recordBatches`, () => {
RecordBatchStreamWriter.writeAll(df).toUint8Array(true);
RecordBatchStreamWriter.writeAll(table).toUint8Array(true);
}),

b.cycle(cycle)
);

const schema = df.schema;
const schema = table.schema;

const suites = [{
suite_name: `Get values by index`,
Expand All @@ -95,7 +109,7 @@ for (const { name, ipc, df } of config) {
suite_name,

...schema.fields.map((f, i) => {
const vector = df.getChildAt(i)!;
const vector = table.getChildAt(i)!;
return b.add(`dataset: ${name}, column: ${f.name}, length: ${formatNumber(vector.length)}, type: ${vector.type}`, () => {
fn(vector);
});
Expand All @@ -107,87 +121,29 @@ for (const { name, ipc, df } of config) {
}


for (const { name, df, counts } of config) {
// for (const { name, df, countBys, counts } of config) {
for (const { name, table, counts } of config) {
b.suite(
`DataFrame Iterate`,
`Table Iterate`,

b.add(`dataset: ${name}, numRows: ${formatNumber(df.numRows)}`, () => {
for (const _value of df) { }
b.add(`dataset: ${name}, numRows: ${formatNumber(table.numRows)}`, () => {
for (const _value of table) { }
}),

b.cycle(cycle)
);

// b.suite(
// `DataFrame Count By`,

// ...countBys.map((column: string) => b.add(
// `dataset: ${name}, column: ${column}, numRows: ${formatNumber(df.numRows)}, type: ${df.schema.fields.find((c) => c.name === column)!.type}`,
// () => df.countBy(column)
// )),

// b.cycle(cycle)
// );

// b.suite(
// `DataFrame Filter-Scan Count`,

// ...counts.map(({ column, test, value }: { column: string; test: 'gt' | 'eq'; value: number | string }) => b.add(
// `dataset: ${name}, column: ${column}, numRows: ${formatNumber(df.numRows)}, type: ${df.schema.fields.find((c) => c.name === column)!.type}, test: ${test}, value: ${value}`,
// () => {
// let filteredDf: Arrow.FilteredDataFrame;
// if (test == 'gt') {
// filteredDf = df.filter(col(column).gt(value));
// } else if (test == 'eq') {
// filteredDf = df.filter(col(column).eq(value));
// } else {
// throw new Error(`Unrecognized test "${test}"`);
// }

// return () => filteredDf.count();
// }
// )),

// b.cycle(cycle)
// );

// b.suite(
// `DataFrame Filter-Iterate`,

// ...counts.map(({ column, test, value }: { column: string; test: 'gt' | 'eq'; value: number | string }) => b.add(
// `dataset: ${name}, column: ${column}, numRows: ${formatNumber(df.numRows)}, type: ${df.schema.fields.find((c) => c.name === column)!.type}, test: ${test}, value: ${value}`,
// () => {
// let filteredDf: Arrow.FilteredDataFrame;
// if (test == 'gt') {
// filteredDf = df.filter(col(column).gt(value));
// } else if (test == 'eq') {
// filteredDf = df.filter(col(column).eq(value));
// } else {
// throw new Error(`Unrecognized test "${test}"`);
// }

// return () => {
// for (const _value of filteredDf) { }
// };
// }
// )),

// b.cycle(cycle)
// );

b.suite(
`DataFrame Direct Count`,
`Table Direct Count`,

...counts.map(({ column, test, value }: { column: string; test: 'gt' | 'eq'; value: number | string }) => b.add(
`dataset: ${name}, column: ${column}, numRows: ${formatNumber(df.numRows)}, type: ${df.schema.fields.find((c) => c.name === column)!.type}, test: ${test}, value: ${value}`,
`dataset: ${name}, column: ${column}, numRows: ${formatNumber(table.numRows)}, type: ${table.schema.fields.find((c) => c.name === column)!.type}, test: ${test}, value: ${value}`,
() => {
const colidx = df.schema.fields.findIndex((c) => c.name === column);
const colidx = table.schema.fields.findIndex((c) => c.name === column);

if (test == 'gt') {
return () => {
let sum = 0;
const batches = df.batches;
const batches = table.batches;
const numBatches = batches.length;
for (let batchIndex = -1; ++batchIndex < numBatches;) {
// load batches
Expand All @@ -203,7 +159,7 @@ for (const { name, df, counts } of config) {
} else if (test == 'eq') {
return () => {
let sum = 0;
const batches = df.batches;
const batches = table.batches;
const numBatches = batches.length;
for (let batchIndex = -1; ++batchIndex < numBatches;) {
// load batches
Expand Down
23 changes: 0 additions & 23 deletions js/src/Arrow.dom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,39 +56,16 @@ export {
FixedSizeList,
Map_, MapRow,
Table,
// Column,
Schema, Field,
Visitor,
Vector, makeVector, vectorFromArray,
// BaseVector,
// BinaryVector,
// BoolVector,
// Chunked,
// DateVector, DateDayVector, DateMillisecondVector,
// DecimalVector,
// DictionaryVector,
// FixedSizeBinaryVector,
// FixedSizeListVector,
// FloatVector, Float16Vector, Float32Vector, Float64Vector,
// IntervalVector, IntervalDayTimeVector, IntervalYearMonthVector,
// IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector, Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector,
// ListVector,
// MapVector,
// NullVector,
// StructVector,
// TimestampVector, TimestampSecondVector, TimestampMillisecondVector, TimestampMicrosecondVector, TimestampNanosecondVector,
// TimeVector, TimeSecondVector, TimeMillisecondVector, TimeMicrosecondVector, TimeNanosecondVector,
// UnionVector, DenseUnionVector, SparseUnionVector,
// Utf8Vector,
ByteStream, AsyncByteStream, AsyncByteQueue, ReadableSource, WritableSink,
RecordBatchReader, RecordBatchFileReader, RecordBatchStreamReader, AsyncRecordBatchFileReader, AsyncRecordBatchStreamReader,
RecordBatchWriter, RecordBatchFileWriter, RecordBatchStreamWriter, RecordBatchJSONWriter,
MessageReader, AsyncMessageReader, JSONMessageReader,
Message,
RecordBatch,
ArrowJSONLike, FileHandle, Readable, Writable, ReadableWritable, ReadableDOMStreamOptions,
// DataFrame, FilteredDataFrame, CountByResult, BindFunc, NextFunc,
// predicate,
util,
Builder,
BinaryBuilder,
Expand Down
3 changes: 0 additions & 3 deletions js/src/Arrow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,18 +88,15 @@ export { MessageReader, AsyncMessageReader, JSONMessageReader } from './ipc/mess
export { Message } from './ipc/metadata/message';
export { RecordBatch } from './recordbatch';
export { ArrowJSONLike, FileHandle, Readable, Writable, ReadableWritable, ReadableDOMStreamOptions } from './io/interfaces';
// export { DataFrame, FilteredDataFrame, CountByResult, BindFunc, NextFunc } from './compute/dataframe';

import * as util_bn_ from './util/bn';
import * as util_int_ from './util/int';
import * as util_bit_ from './util/bit';
import * as util_math_ from './util/math';
import * as util_buffer_ from './util/buffer';
import * as util_vector_ from './util/vector';
// import * as predicate from './compute/predicate';
import { compareSchemas, compareFields, compareTypes } from './visitor/typecomparator';

// export { predicate };
/** @ignore */
export const util = {
...util_bn_,
Expand Down
Loading