@@ -54,14 +54,17 @@ function columnsFromBatches(batches: Vector[][]) {
5454
5555export class Table implements DataFrame {
5656 static from ( sources ?: Iterable < Uint8Array | Buffer | string > | object | string ) {
57- let batches : Vector < any > [ ] [ ] = [ [ ] ] ;
57+ let batches : Vector [ ] [ ] = [ ] ;
5858 if ( sources ) {
59- batches = Array . from ( read ( sources ) ) ;
59+ batches = [ ] ;
60+ for ( let batch of read ( sources ) ) {
61+ batches . push ( batch ) ;
62+ }
6063 }
6164 return new Table ( { batches } ) ;
6265 }
6366 static async fromAsync ( sources ?: AsyncIterable < Uint8Array | Buffer | string > ) {
64- let batches : Vector < any > [ ] [ ] = [ [ ] ] ;
67+ let batches : Vector [ ] [ ] = [ ] ;
6568 if ( sources ) {
6669 batches = [ ] ;
6770 for await ( let batch of readAsync ( sources ) ) {
@@ -119,34 +122,34 @@ export class Table implements DataFrame {
119122 count_by = new Col ( count_by ) ;
120123 }
121124
122- // the last batch will have the most complete dictionary, use it's data
123- // vector as our count by keys
125+ // Assume that all dictionary batches are deltas, which means that the
126+ // last record batch has the most complete dictionary
124127 count_by . bind ( this . batches [ this . batches . length - 1 ] ) ;
125128 if ( ! ( count_by . vector instanceof DictionaryVector ) ) {
126- throw new Error ( " countBy currently only supports dictionary-encoded columns" ) ;
129+ throw new Error ( ' countBy currently only supports dictionary-encoded columns' ) ;
127130 }
128131
129- let keys : Vector = ( count_by . vector as DictionaryVector < any > ) . data ;
132+ let data : Vector = ( count_by . vector as DictionaryVector < any > ) . data ;
130133 // TODO: Adjust array byte width based on overall length
131134 // (e.g. if this.length <= 255 use Uint8Array, etc...)
132- let counts : Uint32Array = new Uint32Array ( keys . length ) ;
133-
135+ let counts : Uint32Array = new Uint32Array ( data . length ) ;
134136
135137 for ( let batch = - 1 ; ++ batch < this . lengths . length ; ) {
136138 const length = this . lengths [ batch ] ;
137139
138140 // load batches
139141 const columns = this . batches [ batch ] ;
140142 count_by . bind ( columns ) ;
143+ const keys : Vector = ( count_by . vector as DictionaryVector < any > ) . keys ;
141144
142145 // yield all indices
143146 for ( let idx = - 1 ; ++ idx < length ; ) {
144- let key = ( count_by . vector as DictionaryVector < any > ) . getKey ( idx )
147+ let key = keys . get ( idx ) ;
145148 if ( key !== null ) { counts [ key ] ++ ; }
146149 }
147150 }
148151
149- return new CountByResult ( keys , new Uint32Vector ( { data : counts } ) )
152+ return new CountByResult ( data , new Uint32Vector ( { data : counts } ) ) ;
150153 }
151154 * [ Symbol . iterator ] ( ) {
152155 for ( let batch = - 1 ; ++ batch < this . lengths . length ; ) {
@@ -220,16 +223,17 @@ class FilteredDataFrame implements DataFrame {
220223 count_by = new Col ( count_by ) ;
221224 }
222225
223- // the last batch will have the most complete dictionary, use it's data
224- // vector as our count by keys
226+ // Assume that all dictionary batches are deltas, which means that the
227+ // last record batch has the most complete dictionary
225228 count_by . bind ( this . parent . batches [ this . parent . batches . length - 1 ] ) ;
226229 if ( ! ( count_by . vector instanceof DictionaryVector ) ) {
227- throw new Error ( " countBy currently only supports dictionary-encoded columns" ) ;
230+ throw new Error ( ' countBy currently only supports dictionary-encoded columns' ) ;
228231 }
229232
230- let keys : Vector = ( count_by . vector as DictionaryVector < any > ) . data ;
231- let counts : Uint32Array = new Uint32Array ( keys . length ) ;
232-
233+ const data : Vector = ( count_by . vector as DictionaryVector < any > ) . data ;
234+ // TODO: Adjust array byte width based on overall length
235+ // (e.g. if this.length <= 255 use Uint8Array, etc...)
236+ const counts : Uint32Array = new Uint32Array ( data . length ) ;
233237
234238 for ( let batch = - 1 ; ++ batch < this . parent . lengths . length ; ) {
235239 const length = this . parent . lengths [ batch ] ;
@@ -238,28 +242,29 @@ class FilteredDataFrame implements DataFrame {
238242 const columns = this . parent . batches [ batch ] ;
239243 const predicate = this . predicate . bind ( columns ) ;
240244 count_by . bind ( columns ) ;
245+ const keys : Vector = ( count_by . vector as DictionaryVector < any > ) . keys ;
241246
242247 // yield all indices
243248 for ( let idx = - 1 ; ++ idx < length ; ) {
244- let key = ( count_by . vector as DictionaryVector < any > ) . getKey ( idx )
249+ let key = keys . get ( idx ) ;
245250 if ( key !== null && predicate ( idx , columns ) ) { counts [ key ] ++ ; }
246251 }
247252 }
248253
249- return new CountByResult ( keys , new Uint32Vector ( { data : counts } ) )
254+ return new CountByResult ( data , new Uint32Vector ( { data : counts } ) ) ;
250255 }
251256}
252257
253258export class CountByResult extends Table implements DataFrame {
254- constructor ( readonly keys : Vector , readonly counts : Vector < number | null > ) {
255- super ( { batches : [ [ keys , counts ] ] } ) ;
259+ constructor ( readonly values : Vector , readonly counts : Vector < number | null > ) {
260+ super ( { batches : [ [ values , counts ] ] } ) ;
256261 }
257262
258263 asJSON ( ) : Object {
259264 let result : { [ key : string ] : number | null } = { } ;
260265
261266 for ( let i = - 1 ; ++ i < this . length ; ) {
262- result [ this . keys . get ( i ) ] = this . counts . get ( i ) ;
267+ result [ this . values . get ( i ) ] = this . counts . get ( i ) ;
263268 }
264269
265270 return result ;
0 commit comments