Skip to content

Commit 0a672bc

Browse files
trxcllntBrian Hulette
authored andcommitted
ARROW-2226, ARROW-2233: [JS] Dictionary bugfixes
Resolves https://issues.apache.org/jira/browse/ARROW-2226 Author: Paul Taylor <paul.e.taylor@me.com> Author: Brian Hulette <brian.hulette@ccri.com> Closes apache#1671 from trxcllnt/js-fix-dictionary-data and squashes the following commits: ccecf55 <Paul Taylor> Merge pull request #5 from TheNeuralBit/dictionary-vector-tests 3fb9a26 <Brian Hulette> Fix bug in DictionaryVector with nullable indices 2888657 <Brian Hulette> Add dictionary vector unit tests b0a0c08 <Paul Taylor> use indicies.offset in DictionaryData constructor
1 parent 524b522 commit 0a672bc

File tree

3 files changed

+59
-8
lines changed

3 files changed

+59
-8
lines changed

js/src/data.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,9 @@ export class DictionaryData<T extends DataType> extends BaseData<Dictionary<T>>
152152
public get indices() { return this._indices; }
153153
public get dictionary() { return this._dictionary; }
154154
constructor(type: Dictionary<T>, dictionary: Vector<T>, indices: Data<Int<any>>) {
155-
super(type, indices.length, (indices as any)._nullCount);
155+
super(type, indices.length, indices.offset, (indices as any)._nullCount);
156156
this._indices = indices;
157157
this._dictionary = dictionary;
158-
this.length = this._indices.length;
159158
}
160159
public get nullCount() { return this._indices.nullCount; }
161160
public get nullBitmap() { return this._indices.nullBitmap; }

js/src/vector.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,9 @@ export class DictionaryVector<T extends DataType = DataType> extends Vector<Dict
399399
public readonly dictionary: Vector<T>;
400400
constructor(data: Data<Dictionary<T>>, view: View<Dictionary<T>> = new DictionaryView<T>(data.dictionary, new IntVector(data.indices))) {
401401
super(data as Data<any>, view);
402+
if (view instanceof ValidityView) {
403+
view = (view as any).view;
404+
}
402405
if (data instanceof DictionaryData && view instanceof DictionaryView) {
403406
this.indices = view.indices;
404407
this.dictionary = data.dictionary;

js/test/unit/vector-tests.ts

Lines changed: 55 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@
1717

1818
import { TextEncoder } from 'text-encoding-utf-8';
1919
import Arrow from '../Arrow';
20-
import { type, TypedArray, TypedArrayConstructor } from '../../src/Arrow';
20+
import { type, TypedArray, TypedArrayConstructor, Vector } from '../../src/Arrow';
21+
import { packBools } from '../../src/util/bit'
2122

2223
const utf8Encoder = new TextEncoder('utf-8');
2324

24-
const { BoolData, FlatData, FlatListData } = Arrow.data;
25-
const { IntVector, FloatVector, BoolVector, Utf8Vector } = Arrow.vector;
25+
const { BoolData, FlatData, FlatListData, DictionaryData } = Arrow.data;
26+
const { IntVector, FloatVector, BoolVector, Utf8Vector, DictionaryVector } = Arrow.vector;
2627
const {
27-
Utf8, Bool,
28+
Dictionary, Utf8, Bool,
2829
Float16, Float32, Float64,
2930
Int8, Int16, Int32, Int64,
3031
Uint8, Uint16, Uint32, Uint64,
@@ -310,6 +311,54 @@ describe(`Utf8Vector`, () => {
310311
let offset = 0;
311312
const offsets = Uint32Array.of(0, ...values.map((d) => { offset += d.length; return offset; }));
312313
const vector = new Utf8Vector(new FlatListData(new Utf8(), n, null, offsets, utf8Encoder.encode(values.join(''))));
314+
basicVectorTests(vector, values, ['abc', '123']);
315+
describe(`sliced`, () => {
316+
basicVectorTests(vector.slice(1,3), values.slice(1,3), ['foo', 'abc']);
317+
});
318+
});
319+
320+
describe(`DictionaryVector`, () => {
321+
const dictionary = ['foo', 'bar', 'baz'];
322+
const extras = ['abc', '123']; // values to search for that should NOT be found
323+
let offset = 0;
324+
const offsets = Uint32Array.of(0, ...dictionary.map((d) => { offset += d.length; return offset; }));
325+
const dictionary_vec = new Utf8Vector(new FlatListData(new Utf8(), dictionary.length, null, offsets, utf8Encoder.encode(dictionary.join(''))));
326+
327+
const indices = Array.from({length: 50}, () => Math.random() * 3 | 0);
328+
329+
describe(`index with nullCount == 0`, () => {
330+
const indices_data = new FlatData(new Int32(), indices.length, new Uint8Array(0), indices);
331+
332+
const values = Array.from(indices).map((d) => dictionary[d]);
333+
const vector = new DictionaryVector(new DictionaryData(new Dictionary(dictionary_vec.type, indices_data.type), dictionary_vec, indices_data));
334+
335+
basicVectorTests(vector, values, extras);
336+
337+
describe(`sliced`, () => {
338+
basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras);
339+
})
340+
});
341+
342+
describe(`index with nullCount > 0`, () => {
343+
const validity = Array.from({length: indices.length}, () => Math.random() > 0.2 ? true : false);
344+
const indices_data = new FlatData(new Int32(), indices.length, packBools(validity), indices, 0, validity.reduce((acc, d) => acc + (d ? 0 : 1), 0));
345+
const values = Array.from(indices).map((d, i) => validity[i] ? dictionary[d] : null);
346+
const vector = new DictionaryVector(new DictionaryData(new Dictionary(dictionary_vec.type, indices_data.type), dictionary_vec, indices_data));
347+
348+
basicVectorTests(vector, values, ['abc', '123']);
349+
describe(`sliced`, () => {
350+
basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras);
351+
});
352+
});
353+
});
354+
355+
// Creates some basic tests for the given vector.
356+
// Verifies that:
357+
// - `get` and the native iterator return the same data as `values`
358+
// - `indexOf` returns the same indices as `values`
359+
function basicVectorTests(vector: Vector, values: any[], extras: any[]) {
360+
const n = values.length;
361+
313362
test(`gets expected values`, () => {
314363
let i = -1;
315364
while (++i < n) {
@@ -325,14 +374,14 @@ describe(`Utf8Vector`, () => {
325374
}
326375
});
327376
test(`indexOf returns expected values`, () => {
328-
let testValues = values.concat(['abc', '12345']);
377+
let testValues = values.concat(extras);
329378

330379
for (const value of testValues) {
331380
const expected = values.indexOf(value);
332381
expect(vector.indexOf(value)).toEqual(expected);
333382
}
334383
});
335-
});
384+
}
336385

337386
function toMap<T>(entries: Record<string, T>, keys: string[]) {
338387
return keys.reduce((map, key) => {

0 commit comments

Comments
 (0)