1616// under the License.
1717
1818// Use the ES5 UMD target as perf baseline
19- // const { Table, readVectors } = require('../targets/es5/umd');
20- // const { Table, readVectors } = require('../targets/es5/cjs');
21- const { Table, readVectors } = require ( '../targets/es2015/umd' ) ;
22- // const { Table, readVectors } = require('../targets/es2015/cjs');
19+ // const { col, Table, read: readBatches } = require('../targets/es5/umd');
20+ // const { col, Table, read: readBatches } = require('../targets/es5/cjs');
21+ // const { col, Table, read: readBatches } = require('../targets/es2015/umd');
22+ const { col , Table, read : readBatches } = require ( '../targets/es2015/cjs' ) ;
2323
24- const config = require ( './config' ) ;
2524const Benchmark = require ( 'benchmark' ) ;
2625
2726const suites = [ ] ;
2827
29- for ( let { name, buffers} of config ) {
30- const parseSuite = new Benchmark . Suite ( `Parse ${ name } ` , { async : true } ) ;
31- const sliceSuite = new Benchmark . Suite ( `Slice ${ name } vectors` , { async : true } ) ;
32- const iterateSuite = new Benchmark . Suite ( `Iterate ${ name } vectors` , { async : true } ) ;
33- const getByIndexSuite = new Benchmark . Suite ( `Get ${ name } values by index` , { async : true } ) ;
34- parseSuite . add ( createFromTableTest ( name , buffers ) ) ;
35- parseSuite . add ( createReadVectorsTest ( name , buffers ) ) ;
36- for ( const vector of Table . from ( buffers ) . columns ) {
37- sliceSuite . add ( createSliceTest ( vector ) ) ;
38- iterateSuite . add ( createIterateTest ( vector ) ) ;
39- getByIndexSuite . add ( createGetByIndexTest ( vector ) ) ;
40- }
41- suites . push ( getByIndexSuite , iterateSuite , sliceSuite , parseSuite ) ;
28+ for ( let { name, buffers } of require ( './table_config' ) ) {
29+ const parseSuiteName = `Parse "${ name } "` ;
30+ const sliceSuiteName = `Slice "${ name } " vectors` ;
31+ const iterateSuiteName = `Iterate "${ name } " vectors` ;
32+ const getByIndexSuiteName = `Get "${ name } " values by index` ;
33+ const sliceToArraySuiteName = `Slice toArray "${ name } " vectors` ;
34+ suites . push ( createTestSuite ( parseSuiteName , createFromTableTest ( name , buffers ) ) ) ;
35+ suites . push ( createTestSuite ( parseSuiteName , createReadBatchesTest ( name , buffers ) ) ) ;
36+ const table = Table . from ( buffers ) ;
37+ suites . push ( ...table . columns . map ( ( vector , i ) => createTestSuite ( getByIndexSuiteName , createGetByIndexTest ( vector , table . schema . fields [ i ] . name ) ) ) ) ;
38+ suites . push ( ...table . columns . map ( ( vector , i ) => createTestSuite ( iterateSuiteName , createIterateTest ( vector , table . schema . fields [ i ] . name ) ) ) ) ;
39+ suites . push ( ...table . columns . map ( ( vector , i ) => createTestSuite ( sliceToArraySuiteName , createSliceToArrayTest ( vector , table . schema . fields [ i ] . name ) ) ) ) ;
40+ suites . push ( ...table . columns . map ( ( vector , i ) => createTestSuite ( sliceSuiteName , createSliceTest ( vector , table . schema . fields [ i ] . name ) ) ) ) ;
41+ }
42+
43+ for ( let { name, buffers, countBys, counts} of require ( './table_config' ) ) {
44+ const table = Table . from ( buffers ) ;
45+
46+ const dfCountBySuiteName = `DataFrame Count By "${ name } "` ;
47+ const dfFilterCountSuiteName = `DataFrame Filter-Scan Count "${ name } "` ;
48+ const dfDirectCountSuiteName = `DataFrame Direct Count "${ name } "` ;
49+
50+ suites . push ( ...countBys . map ( ( countBy ) => createTestSuite ( dfCountBySuiteName , createDataFrameCountByTest ( table , countBy ) ) ) ) ;
51+ suites . push ( ...counts . map ( ( { col, test, value } ) => createTestSuite ( dfFilterCountSuiteName , createDataFrameFilterCountTest ( table , col , test , value ) ) ) ) ;
52+ suites . push ( ...counts . map ( ( { col, test, value } ) => createTestSuite ( dfDirectCountSuiteName , createDataFrameDirectCountTest ( table , col , test , value ) ) ) ) ;
4253}
4354
4455console . log ( 'Running apache-arrow performance tests...\n' ) ;
@@ -52,7 +63,7 @@ function run() {
5263 var str = x . toString ( ) ;
5364 var meanMsPerOp = Math . round ( x . stats . mean * 100000 ) / 100 ;
5465 var sliceOf60FPS = Math . round ( ( meanMsPerOp / ( 1000 / 60 ) ) * 100000 ) / 1000 ;
55- return `${ str } ( avg: ${ meanMsPerOp } ms, or ${ sliceOf60FPS } % of a frame @ 60FPS) ${ x . suffix || '' } ` ;
66+ return `${ str } \n avg: ${ meanMsPerOp } ms\n ${ sliceOf60FPS } % of a frame @ 60FPS ${ x . suffix || '' } ` ;
5667 } ) . join ( '\n' ) + '\n' ) ;
5768 if ( suites . length > 0 ) {
5869 setTimeout ( run , 1000 ) ;
@@ -61,51 +72,141 @@ function run() {
6172 . run ( { async : true } ) ;
6273}
6374
75+ function createTestSuite ( name , test ) {
76+ return new Benchmark . Suite ( name , { async : true } ) . add ( test ) ;
77+ }
78+
6479function createFromTableTest ( name , buffers ) {
6580 let table ;
6681 return {
6782 async : true ,
68- name : `Table.from` ,
83+ name : `Table.from\n ` ,
6984 fn ( ) { table = Table . from ( buffers ) ; }
7085 } ;
7186}
7287
73- function createReadVectorsTest ( name , buffers ) {
74- let vectors ;
88+ function createReadBatchesTest ( name , buffers ) {
89+ let recordBatch ;
7590 return {
7691 async : true ,
77- name : `readVectors ` ,
78- fn ( ) { for ( vectors of readVectors ( buffers ) ) { } }
92+ name : `readBatches\n ` ,
93+ fn ( ) { for ( recordBatch of readBatches ( buffers ) ) { } }
7994 } ;
8095}
8196
82- function createSliceTest ( vector ) {
97+ function createSliceTest ( vector , name ) {
8398 let xs ;
8499 return {
85100 async : true ,
86- name : `name: '${ vector . name } ', length: ${ vector . length } , type: ${ vector . type } ` ,
101+ name : `name: '${ name } ', length: ${ vector . length } , type: ${ vector . type } \n ` ,
87102 fn ( ) { xs = vector . slice ( ) ; }
88103 } ;
89104}
90105
91- function createIterateTest ( vector ) {
106+ function createSliceToArrayTest ( vector , name ) {
107+ let xs ;
108+ return {
109+ async : true ,
110+ name : `name: '${ name } ', length: ${ vector . length } , type: ${ vector . type } \n` ,
111+ fn ( ) { xs = vector . slice ( ) . toArray ( ) ; }
112+ } ;
113+ }
114+
115+ function createIterateTest ( vector , name ) {
92116 let value ;
93117 return {
94118 async : true ,
95- name : `name: '${ vector . name } ', length: ${ vector . length } , type: ${ vector . type } ` ,
119+ name : `name: '${ name } ', length: ${ vector . length } , type: ${ vector . type } \n ` ,
96120 fn ( ) { for ( value of vector ) { } }
97121 } ;
98122}
99123
100- function createGetByIndexTest ( vector ) {
124+ function createGetByIndexTest ( vector , name ) {
101125 let value ;
102126 return {
103127 async : true ,
104- name : `name: '${ vector . name } ', length: ${ vector . length } , type: ${ vector . type } ` ,
128+ name : `name: '${ name } ', length: ${ vector . length } , type: ${ vector . type } \n ` ,
105129 fn ( ) {
106130 for ( let i = - 1 , n = vector . length ; ++ i < n ; ) {
107131 value = vector . get ( i ) ;
108132 }
109133 }
110134 } ;
111135}
136+
137+ function createDataFrameDirectCountTest ( table , column , test , value ) {
138+ let sum , colidx = table . schema . fields . findIndex ( ( c ) => c . name === column ) ;
139+
140+ if ( test == 'gteq' ) {
141+ op = function ( ) {
142+ sum = 0 ;
143+ let batches = table . batches ;
144+ let numBatches = batches . length ;
145+ for ( let batchIndex = - 1 ; ++ batchIndex < numBatches ; ) {
146+ // load batches
147+ const { numRows, columns } = batches [ batchIndex ] ;
148+ const vector = columns [ colidx ] ;
149+ // yield all indices
150+ for ( let index = - 1 ; ++ index < numRows ; ) {
151+ sum += ( vector . get ( index ) >= value ) ;
152+ }
153+ }
154+ }
155+ } else if ( test == 'eq' ) {
156+ op = function ( ) {
157+ sum = 0 ;
158+ let batches = table . batches ;
159+ let numBatches = batches . length ;
160+ for ( let batchIndex = - 1 ; ++ batchIndex < numBatches ; ) {
161+ // load batches
162+ const { numRows, columns } = batches [ batchIndex ] ;
163+ const vector = columns [ colidx ] ;
164+ // yield all indices
165+ for ( let index = - 1 ; ++ index < numRows ; ) {
166+ sum += ( vector . get ( index ) === value ) ;
167+ }
168+ }
169+ }
170+ } else {
171+ throw new Error ( `Unrecognized test "${ test } "` ) ;
172+ }
173+
174+ return {
175+ async : true ,
176+ name : `name: '${ column } ', length: ${ table . numRows } , type: ${ table . columns [ colidx ] . type } , test: ${ test } , value: ${ value } \n` ,
177+ fn : op
178+ } ;
179+ }
180+
181+ function createDataFrameCountByTest ( table , column ) {
182+ let colidx = table . schema . fields . findIndex ( ( c ) => c . name === column ) ;
183+
184+ return {
185+ async : true ,
186+ name : `name: '${ column } ', length: ${ table . numRows } , type: ${ table . columns [ colidx ] . type } \n` ,
187+ fn ( ) {
188+ table . countBy ( column ) ;
189+ }
190+ } ;
191+ }
192+
193+ function createDataFrameFilterCountTest ( table , column , test , value ) {
194+ let colidx = table . schema . fields . findIndex ( ( c ) => c . name === column ) ;
195+ let df ;
196+
197+ if ( test == 'gteq' ) {
198+ df = table . filter ( col ( column ) . gteq ( value ) ) ;
199+ } else if ( test == 'eq' ) {
200+ df = table . filter ( col ( column ) . eq ( value ) ) ;
201+ } else {
202+ throw new Error ( `Unrecognized test "${ test } "` ) ;
203+ }
204+
205+ return {
206+ async : true ,
207+ name : `name: '${ column } ', length: ${ table . numRows } , type: ${ table . columns [ colidx ] . type } , test: ${ test } , value: ${ value } \n` ,
208+ fn ( ) {
209+ df . count ( ) ;
210+ }
211+ } ;
212+ }
0 commit comments