Closed
Description
a [very-crude] demo to import/exports large csv's exists @ https://kaizhu256.github.io/demo-sqljs-csv/
open dev-console and copy-paste following code to reproduce million-row result in screenshot:
(async function () {
"use strict";
let csv;
let ii;
let randomSelect;
let result;
randomSelect = function (list) {
/*
* this function will select a random element from list
*/
return list[Math.floor(list.length * Math.random())];
};
csv = "rowid,name,address,random\n";
ii = 0;
// generate million-row csv
while (ii < 1000000) {
csv += (
ii + 1 + ","
+ randomSelect([
"Bob", "Jane", "John"
]) + " " + randomSelect([
"Doe", "Smith", "Williams"
]) + ","
+ "\"1234 Main St., Los Angeles, CA 90001\","
+ Math.random() + "\n"
);
ii += 1;
}
console.error(csv.slice(0, 1000));
// rowid,name,address,random
// 1,Jane Doe,"1234 Main St., Los Angeles, CA 90001",0.8783498663648375
// 2,Bob Williams,"1234 Main St., Los Angeles, CA 90001",0.22973214766766303
// 3,John Doe,"1234 Main St., Los Angeles, CA 90001",0.8658095647533652
// 4,Jane Smith,"1234 Main St., Los Angeles, CA 90001",0.27730496836028085
// ...
// 1000000,Jane Williams,"1234 Main St., Los Angeles, CA 90001",0.43105992922801883
await window.sqljsTableImport({
csv,
tableName: "table1"
});
// sqljsTableImport - 945 ms - inserted 12,572 rows - 1 MB
// sqljsTableImport - 1163 ms - inserted 25,017 rows - 2 MB
// ...
// sqljsTableImport - 6242 ms - inserted 997,423 rows - 81 MB
// sqljsTableImport - 6252 ms - inserted 1,000,000 rows - 81 MB
result = await window.sqljsExec(
"SELECT * FROM table1 WHERE\n"
+ "name = 'John Doe'\n"
+ "AND random > 0.5\n"
+ "ORDER BY random DESC\n"
+ "LIMIT 1000;"
);
console.error(result.results[0].values);
// ["961621", "John Doe", "1234 Main St., Los Angeles, CA 90001", "0.999 ...
// ["51800", "John Williams ", "1234 Main St., Los Angeles, CA 90001", "0.999 ...
// ["241184", "John Smith ", "1234 Main St., Los Angeles, CA 90001", "0.999 ...
// ["591592", "John Williams ", "1234 Main St., Los Angeles, CA 90001", "0.999 ...
// ["32403", "John Doe", "1234 Main St., Los Angeles, CA 90001", "0.999 ...
// ["847237", "John Smith ", "1234 Main St., Los Angeles, CA 90001", "0.999 ...
// ["23195", "John Doe", "1234 Main St., Los Angeles, CA 90001", "0.999 ...
// ["136423", "John Smith ", "1234 Main St., Los Angeles, CA 90001", "0.999 ...
}());
the idea is to add 2 new worker-only functions workerTableImport
and workerTableExport
- workerTableImport - can ingest large csv, json list-of-list, json list-of-object into sqlite-temp-table [1], with a progress-callback to give user feedback on progresss of ingestion.
- workerTableExport - can export a sqlite-table as a csv, or json-list-of-object (json-list-of-list already covered by exec-command) [2].
also, @lovasoa wants to revamp workers at the same time, #364 (comment).
perhaps we could create a new Worker class re-using most of existing code:
function Worker (sqljsWorkerScriptUrl) {...}
Worker.prototype.postMessage = function ({ action, sql, params }) {...};
Worker.prototype.tableExport = function ({ tableName, format }) {...};
Worker.prototype.tableImport = function ({ tableName, data, format, onProgress }) {...};
[1] source-code for function sqljsTableImport
https://github.com/kaizhu256/demo-sqljs-csv/blob/c4337fa54d04c0fec067dd7ea41e3744277eca74/assets.sqljs-v2019.11.5.extra.js#L210
[2] source-code for function sqljsTableExport
https://github.com/kaizhu256/demo-sqljs-csv/blob/c4337fa54d04c0fec067dd7ea41e3744277eca74/assets.sqljs-v2019.11.5.extra.js#L55