diff --git a/.eslintrc.js b/.eslintrc.js new file mode 100644 index 0000000..5f63b0c --- /dev/null +++ b/.eslintrc.js @@ -0,0 +1,17 @@ +module.exports = { + "env": { + "commonjs": true, + "es6": true, + "node": true + }, + "extends": "eslint:recommended", + "globals": { + "Atomics": "readonly", + "SharedArrayBuffer": "readonly" + }, + "parserOptions": { + "ecmaVersion": 2018 + }, + "rules": { + } +}; \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..6eabdcd --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,20 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' + +--- + +**Describe the bug** + +A clear and concise description of what the bug is, the CSV package version you are using. + +**To Reproduce** + +Please provide us with a unit test, an example code or even pseudo-code. It could be written in JavaScript or CoffeeScript, it doesn't matter. What's important is to limit the data to the minimum as well as to strip down the number of options to the only ones with an impact. + +**Additional context** + +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bf1164f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,24 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: enhancement +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** + +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** + +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** + +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** + +Add any other context or screenshots about the feature request here. diff --git a/.travis.yml b/.travis.yml index d831dcb..0a6e80c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,6 @@ language: node_js node_js: - - "8" - "10" - "11" + - "12" + - "14" diff --git a/CHANGELOG.md b/CHANGELOG.md index 02eee8a..2b452bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,9 +3,182 @@ ## Todo -* skip_lines_with_error: rename to skip_records_with_error -* max_comment_size: new option +* `skip_lines_with_empty_values`: rename to skip_records_with_empty_values +* `skip_lines_with_error`: rename to skip_records_with_error +* `relax`: rename to relax_quotes_when_unquoted +* `max_comment_size`: new option * promise: new API module +* errors: finish normalisation of all errors + +## Version 4.12.0 + +New feature: +* ts: error types +* ts: support camelcase options (fix #287) + +## Version 4.11.1 + +New feature: +* escape: disabled when null or false + +Project management: +* travis: test node version 14 + +## Version 4.11 + +Project management: +* mistake in the release + +## Version 4.10.1 + +Minor improvements: +* columns_duplicates_to_array: error and type + +## Version 4.10.0 + +New feature: +* columns_duplicates_to_array: new option + +Project management: +* samples: new file recipie + +## Version 4.9.1 + +Minor improvements: +* delimiter: update ts definition +* delimiter: new sample + +## Version 4.9.0 + +New Feature: +* delimiter: accept multiple values + +## Version 4.8.9 + +Fix: +* sync: disregard emitted null records + +New Feature: +* trim: support form feed character + +Minor improvements: +* src: cache length in loops +* trim: new sample +* to_line: simple sample +* comment: simple sample +* bom: sample with hidden bom +* bom: test behavior with the column option + +## Version 4.8.8 + +* api: fix regression in browser environments + +## Version 4.8.7 + +* api: fix input string with output stream + +## Version 4.8.6 + +* on_record: catch and handle user errors + +## Version 4.8.5 + +* ts: fix `types` declaration + +## Version 4.8.4 + +* ts: fix `types` declaration to a single file + +## Version 4.8.3 + +* `errors`: handle undefined captureStackTrace + +## Version 4.8.2 + +* `relax_column_count`: ts definitions for less and more + +## Version 4.8.1 + +* package: move pad dependency to dev + +## Version 4.8.0 + +* `relax_column_count`: new less and more options +* columns: skip empty records before detecting headers +* errors: rename `CSV_INCONSISTENT_RECORD_LENGTH` +* errors: rename `CSV_RECORD_DONT_MATCH_COLUMNS_LENGTH` + +## Version 4.7.0 + +New Feature: +* `on_record`: user function to alter and filter records + +Minor improvements: +* test: ensure every sample is valid +* `from_line`: honours inferred column names +* `from_line`: new sample +* errors: expose `CSV_INVALID_ARGUMENT` +* errors: expose `CSV_INVALID_COLUMN_DEFINITION` +* errors: expose `CSV_OPTION_COLUMNS_MISSING_NAME` +* errors: expose `CSV_INVALID_OPTION_BOM` +* errors: expose `CSV_INVALID_OPTION_CAST` +* errors: expose `CSV_INVALID_OPTION_CAST_DATE` +* errors: expose `CSV_INVALID_OPTION_COLUMNS` +* errors: expose `CSV_INVALID_OPTION_COMMENT` +* errors: expose `CSV_INVALID_OPTION_DELIMITER` +* error: fix call to supper + +Project management: +* package: contributing +* package: code of conduct + +## Version 4.6.5 + +* context: column is null when cast force the context creation, fix #260 + +## Version 4.6.4 + +* errors: don't stringify/parse undefined and null values +* errors: expose `CSV_NON_TRIMABLE_CHAR_AFTER_CLOSING_QUOTE` +* errors: expose `CSV_MAX_RECORD_SIZE` + +## Version 4.6.3 + +* lint: integrate eslint + +## Version 4.6.2 + +* context: null column when columns number inferior to record length + +## Version 4.6.1 + +* src: set const in for loop + +## Version 4.6.0 + +* `skip_lines_with_empty_values`: handle non string value +* errors: add context information +* tests: new error assertion framework +* buffer: serialize to json as string +* errors: expose `INVALID_OPENING_QUOTE` + +## Version 4.5.0 + +* errors: start normalizing errors with unique codes and context +* errors: expose `CSV_INVALID_CLOSING_QUOTE` +* errors: expose `CSV_QUOTE_NOT_CLOSED` +* errors: expose `CSV_INVALID_RECORD_LENGTH_DONT_PREVIOUS_RECORDS` +* errors: expose `CSV_INVALID_RECORD_LENGTH_DONT_MATCH_COLUMNS` +* errors: expose `CSV_INVALID_COLUMN_MAPPING` + +## Version 4.4.7 + +* travis: remove node.js 8 and add 12 +* destroy: test inside readable event + +## Version 4.4.6 + +* security: remove regexp vulnerable to DOS in cast option, npm report 69742 ## Version 4.4.5 @@ -15,7 +188,7 @@ * package: latest dependencies * bom: detection when buffer smaller than bom -* package: remove deprecated @types/should dependency +* package: remove deprecated `@types/should` dependency * package: update file path ## Version 4.4.3 @@ -24,25 +197,25 @@ ## Version 4.4.2 -* bom: parsing for BOM character #239 +* `bom`: parsing for BOM character #239 * ts: add sync definition * package: replace npm ignore with file field ## Version 4.4.1 Fix: -* columns: allows returning an array of string, undefined, null or false +* `columns`: allows returning an array of string, undefined, null or false ## Version 4.4.0 New features: -* options: new bom option +* options: new `bom` option ## Version 4.3.4 -* columns: enrich error message when provided as literal object -* cast: handle undefined columns -* skip_lines_with_error: new sample +* `columns`: enrich error message when provided as literal object +* `cast`: handle undefined columns +* `skip_lines_with_error`: new sample ## Version 4.3.3 @@ -60,52 +233,52 @@ Minor enhancements: * ts: distribute definitions with es5 * ts: unused MatcherFunc type -Project managements: +Project management: * babel: include .babelrc to git ## Version 4.3.0 New features: -* objname: accept a buffer +* `objname`: accept a buffer Minor enhancements: -* to_line: validation refinements -* trim, ltrim, rtrim: validation refinements -* to: validation refinements -* from_line: validation refinements -* objname: validation refinements -* from: validation refinements -* escape: validation refinements -* skip_empty_lines: validation refinements -* skip_lines_with_empty_values: validation refinements -* skip_lines_with_error: validation refinements -* relax_column_count: validation refinements -* relax: validation refinements -* delimiter: validation refinements -* max_record_size: validation refinements +* `to_line`: validation refinements +* `trim`, ltrim, rtrim: validation refinements +* `to`: validation refinements +* `from_line`: validation refinements +* `objname`: validation refinements +* `from`: validation refinements +* `escape`: validation refinements +* `skip_empty_lines`: validation refinements +* `skip_lines_with_empty_values`: validation refinements +* `skip_lines_with_error`: validation refinements +* `relax_column_count`: validation refinements +* `relax`: validation refinements +* `delimiter`: validation refinements +* `max_record_size`: validation refinements ## Version 4.2.0 Fix: -* record_delimiter: fix multi bytes with skip_empty_lines and from_line -* rtrim: accept tab +* `record_delimiter`: fix multi bytes with `skip_empty_lines` and `from_line` +* `rtrim`: accept tab ## Version 4.1.0 New features: * options: accept snake case and camel case -* cast: dont call cast for non column-mappable fields +* `cast`: dont call cast for non column-mappable fields Fix: -* cast: ensure column is a string and not an array +* `cast`: ensure column is a string and not an array * stream: handle empty input streams -* cast: function may return non-string values +* `cast`: function may return non-string values * stream: pass stream options without modification ## Version 4.0.1 Fix: -* relax_column_count: handle records with more columns +* `relax_column_count`: handle records with more columns ## Version 4.0.0 @@ -129,23 +302,23 @@ This is a complete rewrite based with a Buffer implementation. There are no majo New features: * new options `info`, `from_line` and `to_line` -* trim: respect `ltrim` and `rtrim` when defined -* delimiter: may be a Buffer -* delimiter: handle multiple bytes/characters +* `trim`: respect `ltrim` and `rtrim` when defined +* `delimiter`: may be a Buffer +* `delimiter`: handle multiple bytes/characters * callback: export info object as third argument -* cast: catch error in user functions +* `cast`: catch error in user functions * ts: mark info as readonly with required properties -* comment_lines: count the number of commented lines with no records +* `comment_lines`: count the number of commented lines with no records * callback: pass undefined instead of null -API management +API management: * Multiple tests have been rewritten with easier data sample * Source code is now written in ES6 instead of CoffeeScript * package: switch to MIT license ## Version 3.2.0 -* max_limit_on_data_read: update error msg +* `max_limit_on_data_read`: update error msg * src: simplify detection for more data * lines: test empty line account for 1 line * options: extract default options @@ -155,8 +328,8 @@ API management ## Version 3.1.3 -* rowDelimiter: fix overlap with delimiter -* internal: rename rowDelimiterLength to rowDelimiterMaxLength +* `rowDelimiter`: fix overlap with delimiter +* internal: rename rowDelimiterLength to `rowDelimiterMaxLength` ## Version 3.1.2 @@ -179,7 +352,7 @@ API management ## Version 3.0.0 Breaking changes: -* columns: skip empty values when null, false or undefined +* `columns`: skip empty values when null, false or undefined Cleanup: * sync: refactor internal variables @@ -191,31 +364,31 @@ Cleanup: ## Version 2.4.1 -* to: ignore future records when to is reached +* `to`: ignore future records when to is reached ## Version 2.4.0 -* trim: after and before quote +* `trim`: after and before quote * tests: compatibility with Node.js 10 -* trim: handle quote followed by escape +* `trim`: handle quote followed by escape * parser: set nextChar to null instead of empty * travis: run against node 8 and 10 ## Version 2.3.0 -* cast: pass the header property -* auto_parse: deprecated message on tests -* cast: inject lines property +* `cast`: pass the header property +* `auto_parse`: deprecated message on tests +* `cast`: inject lines property ## Version 2.2.0 -* cast: deprecate auto_parse -* auto_parse: function get context as second argument +* `cast`: deprecate `auto_parse` +* `auto_parse`: function get context as second argument ## Version 2.1.0 -* skip_lines_with_error: DRYed implementation -* skip_lines_with_error: Go process the next line on error +* `skip_lines_with_error`: DRYed implementation +* `skip_lines_with_error`: Go process the next line on error * events: register and write not blocking * test: prefix names by group membership * events: emit record @@ -263,15 +436,15 @@ Irrelevant release, forgot to generate the coffee files. ## v1.3.0 -* options: auto_parse as a user function -* options: auto_parse_date as a user function +* options: `auto_parse` as a user function +* options: `auto_parse_date` as a user function * test: should require handled by mocha * package: coffeescript 2 and use semver tilde * options: ensure objectMode is cloned ## v1.2.4 -* relax_column_count: honors count while preserving skipped_line_count +* `relax_column_count`: honors count while preserving skipped_line_count * api: improve argument validation ## v1.2.3 @@ -296,7 +469,7 @@ Irrelevant release, forgot to generate the coffee files. ## v1.2.0 * skip default row delimiters when quoted #58 -* auto_parse: cleaner implementation +* `auto_parse`: cleaner implementation * src: isolate internal variables ## v1.1.12 @@ -305,7 +478,7 @@ Irrelevant release, forgot to generate the coffee files. ## v1.1.11 -* rowDelimiters: fix all last month issues +* `rowDelimiters`: fix all last month issues ## v1.1.10 @@ -313,22 +486,22 @@ Irrelevant release, forgot to generate the coffee files. ## V1.1.9 -* rowDelimiter: simplification +* `rowDelimiter`: simplification * fix regression when trim and skip_empty_lines activated #122 -* auto_parse = simplify internal function +* `auto_parse` = simplify internal function ## V1.1.8 * src: trailing whitespace and empty headers #120 -* rowDelimiter: adding support for multiple row delimiters #119 -* Remove unnecessary argument: Parser.prototype.\__write #114 +* `rowDelimiter`: adding support for multiple row delimiters #119 +* Remove unnecessary argument: `Parser.prototype.__write` #114 ## v1.1.7 -* skip_lines_with_empty_values: support space and tabs #108 +* `skip_lines_with_empty_values`: support space and tabs #108 * test: remove coverage support * test: group by api, options and properties -* skip_lines_with_empty_values option +* `skip_lines_with_empty_values` option * write test illustrating column function throwing an error #98 * added ability to skip columns #50 @@ -339,17 +512,17 @@ Irrelevant release, forgot to generate the coffee files. ## v1.1.5 -* empty_line_count counter and renamed skipped to skipped_line_count +* `empty_line_count` counter and renamed skipped to `skipped_line_count` * skipped line count ## v1.1.4 -* avoid deoptimization due to wrong charAt index #103 +* avoid de-optimisation due to wrong charAt index #103 * parser writing before assigning listeners ## v1.1.3 -* column: stop on column count error #100 +* `columns`: stop on column count error #100 ## v1.1.2 @@ -361,17 +534,17 @@ Irrelevant release, forgot to generate the coffee files. * stream: call end if data instance of buffer * travis: add nodejs 6 -* columns: fix line error #97 +* `columns`: fix line error #97 ## v1.1.0 -* relax_column_count: default to false (strict) +* `relax_column_count`: default to false (strict) ## v1.0.6 -* relax_column_count: backward compatibility for 1.0.x -* relax_column_count: introduce new option -* columns: detect column length and fix lines count +* `relax_column_count`: backward compatibility for 1.0.x +* `relax_column_count`: introduce new option +* `columns`: detect column length and fix lines count ## v1.0.5 @@ -379,14 +552,14 @@ Irrelevant release, forgot to generate the coffee files. * add tests for inconsistent number of columns #73 * throw an error when a column is missing #73 * travis: test nodejs versions 4, 5 -* max_limit_on_data_read: new option +* `max_limit_on_data_read`: new option * removing the duplicate files in test and samples #86 * option argument to accept the number of bytes can be read #86 * avoid unwanted parsing when there is wrong delimiter or row delimiter #86 ## v1.0.4 -* sync: support objname +* sync: support `objname` ## v1.0.3 @@ -406,5 +579,5 @@ Irrelevant release, forgot to generate the coffee files. ## v1.0.0 -* auto_parse: work on all fields, rename to “is_*” -* auto_parse: simplify test +* `auto_parse`: work on all fields, rename to “is_*” +* `auto_parse`: simplify test diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..81b217c --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to make participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when +an individual is representing the project or its community in public spaces. +Examples of representing a project or community include using an official +project e-mail address, posting via an official social media account, or acting +as an appointed representative at an online or offline event. Representation of +a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at [INSERT EMAIL ADDRESS]. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..410deab --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,9 @@ +## How to contribute to CoffeeScript + +* Before you open a ticket or send a pull request, [search](https://github.com/adaltas/node-csv-parse/issues) for previous discussions about the same feature or issue. Add to the earlier ticket if you find one. + +* Before sending a pull request for a feature, be sure to have [tests](https://github.com/adaltas/node-csv-parse/tree/master/test). + +* Use the same coding style as the rest of the [codebase](https://github.com/adaltas/node-csv-parse/tree/master/src). If you’re writting a test and if you're just getting started with CoffeeScript, there’s a nice [style guide](https://github.com/polarmobile/coffeescript-style-guide). + +* Documentation is published on [GitHub](https://github.com/adaltas/node-csv-docs) and you are invited to submit pull request with your changes. For conveniency, you can also browse the website and click on the edit link present at the top of every pages. diff --git a/README.md b/README.md index 6174d5c..f0ccd90 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ + +# CSV Parser for Node.js + [![Build Status](https://api.travis-ci.org/adaltas/node-csv-parse.svg)](https://travis-ci.org/#!/adaltas/node-csv-parse) Part of the [CSV module](https://csv.js.org/), this project is a parser converting CSV text input into arrays or objects. It implements the Node.js [`stream.Transform` API](http://nodejs.org/api/stream.html#stream_class_stream_transform). It also provides a simple callback-based API for convenience. It is both extremely easy to use and powerful. It was first released in 2010 and is used against big data sets by a large community. @@ -9,7 +12,7 @@ Part of the [CSV module](https://csv.js.org/), this project is a parser converti * [Options](http://csv.js.org/parse/options/) * [Info properties](http://csv.js.org/parse/info/) * [Common errors](http://csv.js.org/parse/errors/) -* [Examples](http://csv.js.org/parse/examples/) +* [Examples](http://csv.js.org/project/examples/) ## Features diff --git a/bench/async.iterator.js b/bench/async.iterator.js new file mode 100644 index 0000000..5f5fa91 --- /dev/null +++ b/bench/async.iterator.js @@ -0,0 +1,137 @@ + +const assert = require('assert') +const util = require('util') +const fs = require('fs') +const os = require('os') +const path = require('path') +const stream = require('stream') +const pad = require('pad') +const finished = util.promisify(stream.finished) +const parse = require('..') +const generate = require('csv-generate') +const NS_PER_SEC = 1e9 + +const write = async function(length, target){ + const writter = generate({ + length: length + }) + .pipe(fs.createWriteStream(target, { + highWaterMark: 64 * 64 * 1024 + })) + await finished(writter) +} + +const read = async function(length, source){ + let count = 0 + const parser = fs.createReadStream(source, { + highWaterMark: 64 * 64 * 1024 + }).pipe(parse()) + for await (const record of parser) { + count++ + } + assert.strictEqual(count, length) +} + +const dispose = async function(source){ + await fs.promises.unlink(source) +} + +const reporter = function(){ + const data = [] + return function(...info){ + if(info.length){ + data.push(info) + }else{ + return data + } + } +} + +const print = function(results){ + console.log('') + console.log([ + '|', + [ + pad(' length ', 10 + 2), + pad(' nanoseconds ', 15 + 2), + pad(' throughput ', 15 + 2), + ].join('|'), + '|', + ].join('')) + console.log([ + '|', + [ + '-'.repeat(12), + '-'.repeat(17), + '-'.repeat(17), + ].join('|'), + '|', + ].join('')) + results.forEach( ([length, nanoseconds, throughput]) => { + console.log([ + '|', + [ + ` ${pad(`${length}`, 10)} `, + ` ${pad(`${nanoseconds}`, 15)} `, + ` ${pad(`${throughput}`, 15)} `, + ].join('|'), + '|', + ].join('')) + }) + console.log('') +} + +const main = async function(){ + const tests = [ + 20000, + 200000, + 2000000, + 20000000, + 200000000, + ].map( length => ({ + length: length, + target: path.join(os.tmpdir(), `data-${length}.csv`), + }) ) + const report = reporter() + await Promise.all( + tests.map(async function({length, target}){ + const time = process.hrtime() + await write(length, target) + const [seconds, nanoseconds] = process.hrtime(time) + console.log(`File ${target} created in ${seconds} seconds`) + }) + ) + await Promise.all( + await tests.map(async function({length, target}){ + const hrtime = process.hrtime() + await read(length, target) + const [seconds, hrtime_nanoseconds] = process.hrtime(hrtime) + const nanoseconds = seconds * NS_PER_SEC + hrtime_nanoseconds + const throughput = Math.round(length / nanoseconds * NS_PER_SEC) + console.log('Benchmark time:', `${nanoseconds} nanoseconds (${seconds} seconds)`) + console.log('Benchmark throughput:', Math.round(throughput), 'records per second') + report(length, nanoseconds, throughput) + }) + ) + await Promise.all( + await tests.map(async function({target}){ + await dispose(target) + }) + ) + results = report() + print(results) +} + +main() + +/* + +| length | nanoseconds | throughput | +|------------|-----------------|-----------------| +| 20000 | 983243192 | 20341 | +| 200000 | 3427937159 | 58344 | +| 2000000 | 23679366525 | 84462 | +| 20000000 | 178759143881 | 111882 | +| 200000000 | 979745580322 | 204135 | + +*/ diff --git a/lib/ResizeableBuffer.js b/lib/ResizeableBuffer.js index 91509ef..ce1021a 100644 --- a/lib/ResizeableBuffer.js +++ b/lib/ResizeableBuffer.js @@ -35,6 +35,9 @@ class ResizeableBuffer{ toString(){ return this.buf.slice(0, this.length).toString() } + toJSON(){ + return this.toString() + } reset(){ this.length = 0 } diff --git a/lib/es5/ResizeableBuffer.js b/lib/es5/ResizeableBuffer.js index 0273d0d..6381804 100644 --- a/lib/es5/ResizeableBuffer.js +++ b/lib/es5/ResizeableBuffer.js @@ -6,9 +6,7 @@ function _defineProperties(target, props) { for (var i = 0; i < props.length; i+ function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; } -var ResizeableBuffer = -/*#__PURE__*/ -function () { +var ResizeableBuffer = /*#__PURE__*/function () { function ResizeableBuffer() { var size = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 100; @@ -62,6 +60,11 @@ function () { value: function toString() { return this.buf.slice(0, this.length).toString(); } + }, { + key: "toJSON", + value: function toJSON() { + return this.toString(); + } }, { key: "reset", value: function reset() { diff --git a/lib/es5/index.d.ts b/lib/es5/index.d.ts index 8be0723..47eccaf 100644 --- a/lib/es5/index.d.ts +++ b/lib/es5/index.d.ts @@ -23,13 +23,13 @@ declare namespace parse { __write(chars: any, end: any, callback: any): any; - readonly options: Options; + readonly options: Options readonly info: Info; } interface CastingContext { - readonly column?: number | string; + readonly column: number | string; readonly empty_lines: number; readonly header: boolean; readonly index: number; @@ -51,11 +51,13 @@ declare namespace parse { * @deprecated Use {@link cast} */ auto_parse?: boolean | CastingFunction; + autoParse?: boolean | CastingFunction; /** * If true, the parser will attempt to convert read data types to dates. It requires the "auto_parse" option. * @deprecated Use {@link cast_date} */ auto_parse_date?: boolean | CastingDateFunction; + autoParseDate?: boolean | CastingDateFunction; /** * If true, detect and exclude the byte order mark (BOM) from the CSV input if present. */ @@ -70,6 +72,7 @@ declare namespace parse { * If a function, receive the value as argument and return a new value. It requires the "auto_parse" option. Be careful, it relies on Date.parse. */ cast_date?: boolean | CastingDateFunction; + castDate?: boolean | CastingDateFunction; /** * List of fields as an array, * a user defined callback accepting the first line and returning the column names or true if autodiscovered in the first CSV line, @@ -77,6 +80,12 @@ declare namespace parse { * affect the result data set in the sense that records will be objects instead of arrays. */ columns?: ColumnOption[] | boolean | ((record: any) => ColumnOption[]); + /** + * Convert values into an array of values when columns are activated and + * when multiple columns of the same name are found. + */ + columns_duplicates_to_array?: boolean; + columnsDuplicatesToArray?: boolean; /** * Treat all the characters after this one as a comment, default to '' (disabled). */ @@ -84,7 +93,7 @@ declare namespace parse { /** * Set the field delimiter. One character only, defaults to comma. */ - delimiter?: string | Buffer; + delimiter?: string | string[] | Buffer; /** * Set the escape character, one character only, defaults to double quotes. */ @@ -97,6 +106,7 @@ declare namespace parse { * Start handling records from the requested line number. */ from_line?: number; + fromLine?: number; /** * Generate two properties `info` and `record` where `info` is a snapshot of the info object at the time the record was created and `record` is the parsed array or object. */ @@ -112,14 +122,20 @@ declare namespace parse { * default to 128000 characters. */ max_record_size?: number; + maxRecordSize?: number; /** * Name of header-record title to name objects by. */ objname?: string; + /** + * Alter and filter records by executing a user defined function. + */ + on_record?: (record: any, context: CastingContext) => any; + onRecord?: (record: any, context: CastingContext) => any; /** * Optional character surrounding a field, one character only, defaults to double quotes. */ - quote?: string | boolean | Buffer; + quote?: string | boolean | Buffer | null; /** * Generate two properties raw and row where raw is the original CSV row content and row is the parsed array or object. */ @@ -132,11 +148,23 @@ declare namespace parse { * Discard inconsistent columns count, default to false. */ relax_column_count?: boolean; + relaxColumnCount?: boolean; + /** + * Discard inconsistent columns count when the record contains less fields than expected, default to false. + */ + relax_column_count_less?: boolean; + relaxColumnCountLess?: boolean; + /** + * Discard inconsistent columns count when the record contains more fields than expected, default to false. + */ + relax_column_count_more?: boolean; + relaxColumnCountMore?: boolean; /** * One or multiple characters used to delimit record rows; defaults to auto discovery if not provided. * Supported auto discovery method are Linux ("\n"), Apple ("\r") and Windows ("\r\n") row delimiters. */ record_delimiter?: string | string[] | Buffer | Buffer[]; + recordDelimiter?: string | string[] | Buffer | Buffer[]; /** * If true, ignore whitespace immediately preceding the delimiter (i.e. right-trim all fields), defaults to false. * Does not remove whitespace in a quoted field. @@ -147,14 +175,17 @@ declare namespace parse { * Defaults to false */ skip_empty_lines?: boolean; + skipEmptyLines?: boolean; /** * Skip a line with error found inside and directly go process the next line. */ skip_lines_with_error?: boolean; + skipLinesWithError?: boolean; /** * Don't generate records for lines containing empty column values (column matching /\s*\/), defaults to false. */ skip_lines_with_empty_values?: boolean; + skipLinesWithEmptyValues?: boolean; /** * Stop handling records after the requested number of records. */ @@ -163,6 +194,7 @@ declare namespace parse { * Stop handling records after the requested line number. */ to_line?: number; + toLine?: number; /** * If true, ignore whitespace immediately around the delimiter, defaults to false. * Does not remove whitespace in a quoted field. @@ -192,4 +224,32 @@ declare namespace parse { */ readonly invalid_field_length: number; } + + class CsvError extends Error { + readonly code: CsvErrorCode; + [key: string]: any; + + constructor(code: CsvErrorCode, message: string | string[], ...contexts: any[]); + } + + type CsvErrorCode = + 'CSV_INVALID_OPTION_BOM' + | 'CSV_INVALID_OPTION_CAST' + | 'CSV_INVALID_OPTION_CAST_DATE' + | 'CSV_INVALID_OPTION_COLUMNS' + | 'CSV_INVALID_OPTION_COLUMNS_DUPLICATES_TO_ARRAY' + | 'CSV_INVALID_OPTION_COMMENT' + | 'CSV_INVALID_OPTION_DELIMITER' + | 'CSV_INVALID_OPTION_ON_RECORD' + | 'CSV_INVALID_CLOSING_QUOTE' + | 'INVALID_OPENING_QUOTE' + | 'CSV_INVALID_COLUMN_MAPPING' + | 'CSV_INVALID_ARGUMENT' + | 'CSV_INVALID_COLUMN_DEFINITION' + | 'CSV_MAX_RECORD_SIZE' + | 'CSV_NON_TRIMABLE_CHAR_AFTER_CLOSING_QUOTE' + | 'CSV_QUOTE_NOT_CLOSED' + | 'CSV_INCONSISTENT_RECORD_LENGTH' + | 'CSV_RECORD_DONT_MATCH_COLUMNS_LENGTH' + | 'CSV_OPTION_COLUMNS_MISSING_NAME' } diff --git a/lib/es5/index.js b/lib/es5/index.js index 230d3ce..df3d5a2 100644 --- a/lib/es5/index.js +++ b/lib/es5/index.js @@ -1,26 +1,36 @@ "use strict"; -function _typeof(obj) { if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; } return _typeof(obj); } +function _wrapNativeSuper(Class) { var _cache = typeof Map === "function" ? new Map() : undefined; _wrapNativeSuper = function _wrapNativeSuper(Class) { if (Class === null || !_isNativeFunction(Class)) return Class; if (typeof Class !== "function") { throw new TypeError("Super expression must either be null or a function"); } if (typeof _cache !== "undefined") { if (_cache.has(Class)) return _cache.get(Class); _cache.set(Class, Wrapper); } function Wrapper() { return _construct(Class, arguments, _getPrototypeOf(this).constructor); } Wrapper.prototype = Object.create(Class.prototype, { constructor: { value: Wrapper, enumerable: false, writable: true, configurable: true } }); return _setPrototypeOf(Wrapper, Class); }; return _wrapNativeSuper(Class); } -function _slicedToArray(arr, i) { return _arrayWithHoles(arr) || _iterableToArrayLimit(arr, i) || _nonIterableRest(); } +function _construct(Parent, args, Class) { if (_isNativeReflectConstruct()) { _construct = Reflect.construct; } else { _construct = function _construct(Parent, args, Class) { var a = [null]; a.push.apply(a, args); var Constructor = Function.bind.apply(Parent, a); var instance = new Constructor(); if (Class) _setPrototypeOf(instance, Class.prototype); return instance; }; } return _construct.apply(null, arguments); } -function _nonIterableRest() { throw new TypeError("Invalid attempt to destructure non-iterable instance"); } +function _isNativeFunction(fn) { return Function.toString.call(fn).indexOf("[native code]") !== -1; } -function _iterableToArrayLimit(arr, i) { var _arr = []; var _n = true; var _d = false; var _e = undefined; try { for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { _arr.push(_s.value); if (i && _arr.length === i) break; } } catch (err) { _d = true; _e = err; } finally { try { if (!_n && _i["return"] != null) _i["return"](); } finally { if (_d) throw _e; } } return _arr; } +function _typeof(obj) { "@babel/helpers - typeof"; if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; } return _typeof(obj); } + +function _slicedToArray(arr, i) { return _arrayWithHoles(arr) || _iterableToArrayLimit(arr, i) || _unsupportedIterableToArray(arr, i) || _nonIterableRest(); } + +function _nonIterableRest() { throw new TypeError("Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } + +function _iterableToArrayLimit(arr, i) { if (typeof Symbol === "undefined" || !(Symbol.iterator in Object(arr))) return; var _arr = []; var _n = true; var _d = false; var _e = undefined; try { for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { _arr.push(_s.value); if (i && _arr.length === i) break; } } catch (err) { _d = true; _e = err; } finally { try { if (!_n && _i["return"] != null) _i["return"](); } finally { if (_d) throw _e; } } return _arr; } function _arrayWithHoles(arr) { if (Array.isArray(arr)) return arr; } -function _toConsumableArray(arr) { return _arrayWithoutHoles(arr) || _iterableToArray(arr) || _nonIterableSpread(); } +function _toConsumableArray(arr) { return _arrayWithoutHoles(arr) || _iterableToArray(arr) || _unsupportedIterableToArray(arr) || _nonIterableSpread(); } + +function _nonIterableSpread() { throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } + +function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); } -function _nonIterableSpread() { throw new TypeError("Invalid attempt to spread non-iterable instance"); } +function _iterableToArray(iter) { if (typeof Symbol !== "undefined" && Symbol.iterator in Object(iter)) return Array.from(iter); } -function _iterableToArray(iter) { if (Symbol.iterator in Object(iter) || Object.prototype.toString.call(iter) === "[object Arguments]") return Array.from(iter); } +function _arrayWithoutHoles(arr) { if (Array.isArray(arr)) return _arrayLikeToArray(arr); } -function _arrayWithoutHoles(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = new Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } } +function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; } function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; } -function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(source, true).forEach(function (key) { _defineProperty(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(source).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; } +function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { _defineProperty(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; } function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; } @@ -30,38 +40,43 @@ function _defineProperties(target, props) { for (var i = 0; i < props.length; i+ function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; } +function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function"); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, writable: true, configurable: true } }); if (superClass) _setPrototypeOf(subClass, superClass); } + +function _setPrototypeOf(o, p) { _setPrototypeOf = Object.setPrototypeOf || function _setPrototypeOf(o, p) { o.__proto__ = p; return o; }; return _setPrototypeOf(o, p); } + +function _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = _getPrototypeOf(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = _getPrototypeOf(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return _possibleConstructorReturn(this, result); }; } + function _possibleConstructorReturn(self, call) { if (call && (_typeof(call) === "object" || typeof call === "function")) { return call; } return _assertThisInitialized(self); } function _assertThisInitialized(self) { if (self === void 0) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return self; } -function _getPrototypeOf(o) { _getPrototypeOf = Object.setPrototypeOf ? Object.getPrototypeOf : function _getPrototypeOf(o) { return o.__proto__ || Object.getPrototypeOf(o); }; return _getPrototypeOf(o); } - -function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function"); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, writable: true, configurable: true } }); if (superClass) _setPrototypeOf(subClass, superClass); } +function _isNativeReflectConstruct() { if (typeof Reflect === "undefined" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === "function") return true; try { Date.prototype.toString.call(Reflect.construct(Date, [], function () {})); return true; } catch (e) { return false; } } -function _setPrototypeOf(o, p) { _setPrototypeOf = Object.setPrototypeOf || function _setPrototypeOf(o, p) { o.__proto__ = p; return o; }; return _setPrototypeOf(o, p); } +function _getPrototypeOf(o) { _getPrototypeOf = Object.setPrototypeOf ? Object.getPrototypeOf : function _getPrototypeOf(o) { return o.__proto__ || Object.getPrototypeOf(o); }; return _getPrototypeOf(o); } /* CSV Parse -Please look at the [project documentation](https://csv.js.org/parse/) for additional -information. +Please look at the [project documentation](https://csv.js.org/parse/) for +additional information. */ var _require = require('stream'), Transform = _require.Transform; var ResizeableBuffer = require('./ResizeableBuffer'); -var cr = 13; +var tab = 9; var nl = 10; +var np = 12; +var cr = 13; var space = 32; -var tab = 9; var bom_utf8 = Buffer.from([239, 187, 191]); -var Parser = -/*#__PURE__*/ -function (_Transform) { +var Parser = /*#__PURE__*/function (_Transform) { _inherits(Parser, _Transform); + var _super = _createSuper(Parser); + function Parser() { var _this; @@ -69,9 +84,9 @@ function (_Transform) { _classCallCheck(this, Parser); - _this = _possibleConstructorReturn(this, _getPrototypeOf(Parser).call(this, _objectSpread({}, { + _this = _super.call(this, _objectSpread(_objectSpread({}, { readableObjectMode: true - }, {}, opts))); + }), opts)); var options = {}; // Merge with user options for (var opt in opts) { @@ -82,7 +97,7 @@ function (_Transform) { if (options.bom === undefined || options.bom === null || options.bom === false) { options.bom = false; } else if (options.bom !== true) { - throw new Error("Invalid Option: bom must be true, got ".concat(JSON.stringify(options.bom))); + throw new CsvError('CSV_INVALID_OPTION_BOM', ['Invalid option bom:', 'bom must be true,', "got ".concat(JSON.stringify(options.bom))]); } // Normalize option `cast` @@ -94,7 +109,7 @@ function (_Transform) { fnCastField = options.cast; options.cast = true; } else if (options.cast !== true) { - throw new Error('Invalid Option: cast must be true or a function'); + throw new CsvError('CSV_INVALID_OPTION_CAST', ['Invalid option cast:', 'cast must be true or a function,', "got ".concat(JSON.stringify(options.cast))]); } // Normalize option `cast_date` @@ -106,7 +121,7 @@ function (_Transform) { return !isNaN(date) ? new Date(date) : value; }; } else if (typeof options.cast_date !== 'function') { - throw new Error('Invalid Option: cast_date must be true or a function'); + throw new CsvError('CSV_INVALID_OPTION_CAST_DATE', ['Invalid option cast_date:', 'cast_date must be true or a function,', "got ".concat(JSON.stringify(options.cast_date))]); } // Normalize option `columns` @@ -123,7 +138,14 @@ function (_Transform) { } else if (options.columns === undefined || options.columns === null || options.columns === false) { options.columns = false; } else { - throw new Error("Invalid Option columns: expect an object or true, got ".concat(JSON.stringify(options.columns))); + throw new CsvError('CSV_INVALID_OPTION_COLUMNS', ['Invalid option columns:', 'expect an object, a function or true,', "got ".concat(JSON.stringify(options.columns))]); + } // Normalize option `columns_duplicates_to_array` + + + if (options.columns_duplicates_to_array === undefined || options.columns_duplicates_to_array === null || options.columns_duplicates_to_array === false) { + options.columns_duplicates_to_array = false; + } else if (options.columns_duplicates_to_array !== true) { + throw new CsvError('CSV_INVALID_OPTION_COLUMNS_DUPLICATES_TO_ARRAY', ['Invalid option columns_duplicates_to_array:', 'expect an boolean,', "got ".concat(JSON.stringify(options.columns_duplicates_to_array))]); } // Normalize option `comment` @@ -135,41 +157,50 @@ function (_Transform) { } if (!Buffer.isBuffer(options.comment)) { - throw new Error("Invalid Option: comment must be a buffer or a string, got ".concat(JSON.stringify(options.comment))); + throw new CsvError('CSV_INVALID_OPTION_COMMENT', ['Invalid option comment:', 'comment must be a buffer or a string,', "got ".concat(JSON.stringify(options.comment))]); } } // Normalize option `delimiter` - if (options.delimiter === undefined || options.delimiter === null || options.delimiter === false) { - options.delimiter = Buffer.from(','); - } else if (Buffer.isBuffer(options.delimiter)) { - if (options.delimiter.length === 0) { - throw new Error("Invalid Option: delimiter must be a non empty buffer"); - } // Great, nothing to do + var delimiter_json = JSON.stringify(options.delimiter); + if (!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter]; - } else if (typeof options.delimiter === 'string') { - if (options.delimiter.length === 0) { - throw new Error("Invalid Option: delimiter must be a non empty string"); + if (options.delimiter.length === 0) { + throw new CsvError('CSV_INVALID_OPTION_DELIMITER', ['Invalid option delimiter:', 'delimiter must be a non empty string or buffer or array of string|buffer,', "got ".concat(delimiter_json)]); + } + + options.delimiter = options.delimiter.map(function (delimiter) { + if (delimiter === undefined || delimiter === null || delimiter === false) { + return Buffer.from(','); } - options.delimiter = Buffer.from(options.delimiter); - } else { - throw new Error("Invalid Option: delimiter must be a string or a buffer, got ".concat(options.delimiter)); - } // Normalize option `escape` + if (typeof delimiter === 'string') { + delimiter = Buffer.from(delimiter); + } + + if (!Buffer.isBuffer(delimiter) || delimiter.length === 0) { + throw new CsvError('CSV_INVALID_OPTION_DELIMITER', ['Invalid option delimiter:', 'delimiter must be a non empty string or buffer or array of string|buffer,', "got ".concat(delimiter_json)]); + } + return delimiter; + }); // Normalize option `escape` - if (options.escape === undefined || options.escape === null) { + if (options.escape === undefined || options.escape === true) { options.escape = Buffer.from('"'); } else if (typeof options.escape === 'string') { options.escape = Buffer.from(options.escape); + } else if (options.escape === null || options.escape === false) { + options.escape = null; } - if (!Buffer.isBuffer(options.escape)) { - throw new Error("Invalid Option: escape must be a buffer or a string, got ".concat(JSON.stringify(options.escape))); - } else if (options.escape.length !== 1) { - throw new Error("Invalid Option Length: escape must be one character, got ".concat(options.escape.length)); - } else { - options.escape = options.escape[0]; + if (options.escape !== null) { + if (!Buffer.isBuffer(options.escape)) { + throw new Error("Invalid Option: escape must be a buffer, a string or a boolean, got ".concat(JSON.stringify(options.escape))); + } else if (options.escape.length !== 1) { + throw new Error("Invalid Option Length: escape must be one character, got ".concat(options.escape.length)); + } else { + options.escape = options.escape[0]; + } } // Normalize option `from` @@ -239,6 +270,13 @@ function (_Transform) { } else { throw new Error("Invalid Option: objname must be a string or a buffer, got ".concat(options.objname)); + } // Normalize option `on_record` + + + if (options.on_record === undefined || options.on_record === null) { + options.on_record = undefined; + } else if (typeof options.on_record !== 'function') { + throw new CsvError('CSV_INVALID_OPTION_ON_RECORD', ['Invalid option `on_record`:', 'expect a function,', "got ".concat(JSON.stringify(options.on_record))]); } // Normalize option `quote` @@ -295,6 +333,20 @@ function (_Transform) { options.relax_column_count = false; } else { throw new Error("Invalid Option: relax_column_count must be a boolean, got ".concat(JSON.stringify(options.relax_column_count))); + } + + if (typeof options.relax_column_count_less === 'boolean') {// Great, nothing to do + } else if (options.relax_column_count_less === undefined || options.relax_column_count_less === null) { + options.relax_column_count_less = false; + } else { + throw new Error("Invalid Option: relax_column_count_less must be a boolean, got ".concat(JSON.stringify(options.relax_column_count_less))); + } + + if (typeof options.relax_column_count_more === 'boolean') {// Great, nothing to do + } else if (options.relax_column_count_more === undefined || options.relax_column_count_more === null) { + options.relax_column_count_more = false; + } else { + throw new Error("Invalid Option: relax_column_count_more must be a boolean, got ".concat(JSON.stringify(options.relax_column_count_more))); } // Normalize option `skip_empty_lines` @@ -461,7 +513,6 @@ function (_Transform) { bom = _this$options.bom, comment = _this$options.comment, escape = _this$options.escape, - from = _this$options.from, from_line = _this$options.from_line, info = _this$options.info, ltrim = _this$options.ltrim, @@ -478,8 +529,7 @@ function (_Transform) { bomSkipped = _this$state.bomSkipped, previousBuf = _this$state.previousBuf, rawBuffer = _this$state.rawBuffer, - escapeIsQuote = _this$state.escapeIsQuote, - trimChars = _this$state.trimChars; + escapeIsQuote = _this$state.escapeIsQuote; var buf; if (previousBuf === undefined) { @@ -569,8 +619,8 @@ function (_Transform) { this.state.escaping = false; } else { // Escape is only active inside quoted fields - if (this.state.quoting === true && chr === escape && pos + 1 < bufLen) { - // We are quoting, the char is an escape chr and there is a chr to escape + // We are quoting, the char is an escape chr and there is a chr to escape + if (escape !== null && this.state.quoting === true && chr === escape && pos + 1 < bufLen) { if (escapeIsQuote) { if (buf[pos + 1] === quote) { this.state.escaping = true; @@ -599,14 +649,14 @@ function (_Transform) { // Treat next char as a regular character // TODO: need to compare bytes instead of single char - if (chr === escape && nextChr === quote) { + if (escape !== null && chr === escape && nextChr === quote) { pos++; } else if (!nextChr || isNextChrDelimiter || isNextChrRowDelimiter || isNextChrComment || isNextChrTrimable) { this.state.quoting = false; this.state.wasQuoting = true; continue; } else if (relax === false) { - var err = this.__error("Invalid Closing Quote: got \"".concat(String.fromCharCode(nextChr), "\" at line ").concat(this.info.lines, " instead of delimiter, row delimiter, trimable character (if activated) or comment")); + var err = this.__error(new CsvError('CSV_INVALID_CLOSING_QUOTE', ['Invalid Closing Quote:', "got \"".concat(String.fromCharCode(nextChr), "\""), "at line ".concat(this.info.lines), 'instead of delimiter, row delimiter, trimable character', '(if activated) or comment'], this.__context())); if (err !== undefined) return err; } else { @@ -619,7 +669,9 @@ function (_Transform) { if (this.state.field.length !== 0) { // In relax mode, treat opening quote preceded by chrs as regular if (relax === false) { - var _err = this.__error("Invalid opening quote at line ".concat(this.info.lines)); + var _err = this.__error(new CsvError('INVALID_OPENING_QUOTE', ['Invalid Opening Quote:', "a quote is found inside a field at line ".concat(this.info.lines)], this.__context(), { + field: this.state.field + })); if (_err !== undefined) return _err; } @@ -704,7 +756,7 @@ function (_Transform) { if (this.state.commenting === false) { if (max_record_size !== 0 && this.state.record_length + this.state.field.length > max_record_size) { - var _err2 = this.__error("Max Record Size: record exceed the maximum number of tolerated bytes of ".concat(max_record_size, " on line ").concat(this.info.lines)); + var _err2 = this.__error(new CsvError('CSV_MAX_RECORD_SIZE', ['Max Record Size:', 'record exceed the maximum number of tolerated bytes', "of ".concat(max_record_size), "at line ".concat(this.info.lines)], this.__context())); if (_err2 !== undefined) return _err2; } @@ -717,15 +769,16 @@ function (_Transform) { if (lappend === true && rappend === true) { this.state.field.append(chr); } else if (rtrim === true && !this.__isCharTrimable(chr)) { - var _err3 = this.__error("Invalid Closing Quote: found non trimable byte after quote at line ".concat(this.info.lines)); + var _err3 = this.__error(new CsvError('CSV_NON_TRIMABLE_CHAR_AFTER_CLOSING_QUOTE', ['Invalid Closing Quote:', 'found non trimable byte after quote', "at line ".concat(this.info.lines)], this.__context())); if (_err3 !== undefined) return _err3; } } if (end === true) { + // Ensure we are not ending in a quoting state if (this.state.quoting === true) { - var _err4 = this.__error("Invalid Closing Quote: quote is not closed at line ".concat(this.info.lines)); + var _err4 = this.__error(new CsvError('CSV_QUOTE_NOT_CLOSED', ['Quote Not Closed:', "the parsing is finished with an opening quote at line ".concat(this.info.lines)], this.__context())); if (_err4 !== undefined) return _err4; } else { @@ -757,56 +810,70 @@ function (_Transform) { }, { key: "__isCharTrimable", value: function __isCharTrimable(chr) { - return chr === space || chr === tab || chr === cr || chr === nl; + return chr === space || chr === tab || chr === cr || chr === nl || chr === np; } }, { key: "__onRow", value: function __onRow() { var _this$options2 = this.options, columns = _this$options2.columns, + columns_duplicates_to_array = _this$options2.columns_duplicates_to_array, info = _this$options2.info, from = _this$options2.from, relax_column_count = _this$options2.relax_column_count, + relax_column_count_less = _this$options2.relax_column_count_less, + relax_column_count_more = _this$options2.relax_column_count_more, raw = _this$options2.raw, skip_lines_with_empty_values = _this$options2.skip_lines_with_empty_values; var _this$state2 = this.state, enabled = _this$state2.enabled, - record = _this$state2.record; // Convert the first line into column names + record = _this$state2.record; + + if (enabled === false) { + return this.__resetRow(); + } // Convert the first line into column names + + + var recordLength = record.length; if (columns === true) { + if (isRecordEmpty(record)) { + this.__resetRow(); + + return; + } + return this.__firstLineToColumns(record); } - var recordLength = record.length; - if (columns === false && this.info.records === 0) { this.state.expectedRecordLength = recordLength; - } else if (enabled === true) { - if (recordLength !== this.state.expectedRecordLength) { - if (relax_column_count === true) { - this.info.invalid_field_length++; - } else { - if (columns === false) { - var err = this.__error("Invalid Record Length: expect ".concat(this.state.expectedRecordLength, ", got ").concat(recordLength, " on line ").concat(this.info.lines)); + } - if (err !== undefined) return err; - } else { - var _err5 = this.__error("Invalid Record Length: header length is ".concat(columns.length, ", got ").concat(recordLength, " on line ").concat(this.info.lines)); + if (recordLength !== this.state.expectedRecordLength) { + if (relax_column_count === true || relax_column_count_less === true && recordLength < this.state.expectedRecordLength || relax_column_count_more === true && recordLength > this.state.expectedRecordLength) { + this.info.invalid_field_length++; + } else { + if (columns === false) { + var err = this.__error(new CsvError('CSV_INCONSISTENT_RECORD_LENGTH', ['Invalid Record Length:', "expect ".concat(this.state.expectedRecordLength, ","), "got ".concat(recordLength, " on line ").concat(this.info.lines)], this.__context(), { + record: record + })); - if (_err5 !== undefined) return _err5; - } + if (err !== undefined) return err; + } else { + var _err5 = this.__error( // CSV_INVALID_RECORD_LENGTH_DONT_MATCH_COLUMNS + new CsvError('CSV_RECORD_DONT_MATCH_COLUMNS_LENGTH', ['Invalid Record Length:', "columns length is ".concat(columns.length, ","), // rename columns + "got ".concat(recordLength, " on line ").concat(this.info.lines)], this.__context(), { + record: record + })); + + if (_err5 !== undefined) return _err5; } } } - if (enabled === false) { - return this.__resetRow(); - } - if (skip_lines_with_empty_values === true) { - if (record.map(function (field) { - return field.trim(); - }).join('') === '') { + if (isRecordEmpty(record)) { this.__resetRow(); return; @@ -826,49 +893,83 @@ function (_Transform) { if (columns !== false) { var obj = {}; // Transform record array to an object - for (var i in record) { - if (columns[i] === undefined || columns[i].disabled) continue; - obj[columns[i].name] = record[i]; + for (var i = 0, l = record.length; i < l; i++) { + if (columns[i] === undefined || columns[i].disabled) continue; // obj[columns[i].name] = record[i] + // Turn duplicate columns into an array + + if (columns_duplicates_to_array === true && obj[columns[i].name]) { + if (Array.isArray(obj[columns[i].name])) { + obj[columns[i].name] = obj[columns[i].name].concat(record[i]); + } else { + obj[columns[i].name] = [obj[columns[i].name], record[i]]; + } + } else { + obj[columns[i].name] = record[i]; + } } var objname = this.options.objname; if (objname === undefined) { if (raw === true || info === true) { - this.push(Object.assign({ + var _err6 = this.__push(Object.assign({ record: obj }, raw === true ? { raw: this.state.rawBuffer.toString() } : {}, info === true ? { info: this.state.info } : {})); + + if (_err6) { + return _err6; + } } else { - this.push(obj); + var _err7 = this.__push(obj); + + if (_err7) { + return _err7; + } } } else { if (raw === true || info === true) { - this.push(Object.assign({ + var _err8 = this.__push(Object.assign({ record: [obj[objname], obj] }, raw === true ? { raw: this.state.rawBuffer.toString() } : {}, info === true ? { info: this.state.info } : {})); + + if (_err8) { + return _err8; + } } else { - this.push([obj[objname], obj]); + var _err9 = this.__push([obj[objname], obj]); + + if (_err9) { + return _err9; + } } } } else { if (raw === true || info === true) { - this.push(Object.assign({ + var _err10 = this.__push(Object.assign({ record: record }, raw === true ? { raw: this.state.rawBuffer.toString() } : {}, info === true ? { info: this.state.info } : {})); + + if (_err10) { + return _err10; + } } else { - this.push(record); + var _err11 = this.__push(record); + + if (_err11) { + return _err11; + } } } } @@ -881,11 +982,12 @@ function (_Transform) { var firstLineToHeaders = this.state.firstLineToHeaders; try { - // record = record.filter(function(field){ return field !== undefined}) var headers = firstLineToHeaders === undefined ? record : firstLineToHeaders.call(null, record); if (!Array.isArray(headers)) { - return this.__error("Invalid Header Mapping: expect an array, got ".concat(JSON.stringify(headers))); + return this.__error(new CsvError('CSV_INVALID_COLUMN_MAPPING', ['Invalid Column Mapping:', 'expect an array from column function,', "got ".concat(JSON.stringify(headers))], this.__context(), { + headers: headers + })); } var normalizedHeaders = normalizeColumnsArray(headers); @@ -902,8 +1004,6 @@ function (_Transform) { }, { key: "__resetRow", value: function __resetRow() { - var info = this.options.info; - if (this.options.raw === true) { this.state.rawBuffer.reset(); } @@ -920,9 +1020,10 @@ function (_Transform) { max_record_size = _this$options3.max_record_size; var _this$state3 = this.state, enabled = _this$state3.enabled, - wasQuoting = _this$state3.wasQuoting; // Deal with from_to options + wasQuoting = _this$state3.wasQuoting; // Short circuit for the from_line options - if (this.options.columns !== true && enabled === false) { + if (enabled === false) { + /* this.options.columns !== true && */ return this.__resetField(); } @@ -955,27 +1056,44 @@ function (_Transform) { value: function __resetField() { this.state.field.reset(); this.state.wasQuoting = false; + } + }, { + key: "__push", + value: function __push(record) { + var on_record = this.options.on_record; + + if (on_record !== undefined) { + var context = this.__context(); + + try { + record = on_record.call(null, record, context); + } catch (err) { + return err; + } + + if (record === undefined || record === null) { + return; + } + } + + this.push(record); } // Return a tuple with the error and the casted value }, { key: "__cast", value: function __cast(field) { - var isColumns = Array.isArray(this.options.columns); // Dont loose time calling cast if the field wont be part of the final record + var _this$options4 = this.options, + columns = _this$options4.columns, + relax_column_count = _this$options4.relax_column_count; + var isColumns = Array.isArray(columns); // Dont loose time calling cast + // because the final record is an object + // and this field can't be associated to a key present in columns - if (isColumns === true && this.options.columns.length <= this.state.record.length) { + if (isColumns === true && relax_column_count && this.options.columns.length <= this.state.record.length) { return [undefined, undefined]; } - var context = { - column: isColumns === true ? this.options.columns[this.state.record.length].name : this.state.record.length, - empty_lines: this.info.empty_lines, - header: this.options.columns === true, - index: this.state.record.length, - invalid_field_length: this.info.invalid_field_length, - quoting: this.state.wasQuoting, - lines: this.info.lines, - records: this.info.records - }; + var context = this.__context(); if (this.state.castField !== null) { try { @@ -985,21 +1103,20 @@ function (_Transform) { } } - if (this.__isInt(field) === true) { - return [undefined, parseInt(field)]; - } else if (this.__isFloat(field)) { + if (this.__isFloat(field)) { return [undefined, parseFloat(field)]; } else if (this.options.cast_date !== false) { return [undefined, this.options.cast_date.call(null, field, context)]; } return [undefined, field]; - } - }, { - key: "__isInt", - value: function __isInt(value) { - return /^(\-|\+)?([1-9]+[0-9]*)$/.test(value); - } + } // Keep it in case we implement the `cast_int` option + // __isInt(value){ + // // return Number.isInteger(parseInt(value)) + // // return !isNaN( parseInt( obj ) ); + // return /^(\-|\+)?[1-9][0-9]*$/.test(value) + // } + }, { key: "__isFloat", value: function __isFloat(value) { @@ -1024,10 +1141,9 @@ function (_Transform) { return false; } - var _this$options4 = this.options, - comment = _this$options4.comment, - delimiter = _this$options4.delimiter, - escape = _this$options4.escape; + var _this$options5 = this.options, + comment = _this$options5.comment, + delimiter = _this$options5.delimiter; var _this$state4 = this.state, quoting = _this$state4.quoting, recordDelimiterMaxLength = _this$state4.recordDelimiterMaxLength; @@ -1046,14 +1162,20 @@ function (_Transform) { key: "__isDelimiter", value: function __isDelimiter(chr, buf, pos) { var delimiter = this.options.delimiter; - var delLength = delimiter.length; - if (delimiter[0] !== chr) return 0; - for (var i = 1; i < delLength; i++) { - if (delimiter[i] !== buf[pos + i]) return 0; + loop1: for (var i = 0; i < delimiter.length; i++) { + var del = delimiter[i]; + + if (del[0] === chr) { + for (var j = 1; j < del.length; j++) { + if (del[j] !== buf[pos + j]) continue loop1; + } + + return del.length; + } } - return delimiter.length; + return 0; } }, { key: "__isRecordDelimiter", @@ -1107,7 +1229,7 @@ function (_Transform) { key: "__error", value: function __error(msg) { var skip_lines_with_error = this.options.skip_lines_with_error; - var err = new Error(msg); + var err = typeof msg === 'string' ? new Error(msg) : msg; if (skip_lines_with_error) { this.state.recordHasError = true; @@ -1117,6 +1239,22 @@ function (_Transform) { return err; } } + }, { + key: "__context", + value: function __context() { + var columns = this.options.columns; + var isColumns = Array.isArray(columns); + return { + column: isColumns === true ? columns.length > this.state.record.length ? columns[this.state.record.length].name : null : this.state.record.length, + empty_lines: this.info.empty_lines, + header: columns === true, + index: this.state.record.length, + invalid_field_length: this.info.invalid_field_length, + quoting: this.state.wasQuoting, + lines: this.info.lines, + records: this.info.records + }; + } }]); return Parser; @@ -1137,7 +1275,7 @@ var parse = function parse() { } else if (callback === undefined && type === 'function') { callback = argument; } else { - throw new Error("Invalid argument: got ".concat(JSON.stringify(argument), " at index ").concat(i)); + throw new CsvError('CSV_INVALID_ARGUMENT', ['Invalid argument:', "got ".concat(JSON.stringify(argument), " at index ").concat(i)]); } } @@ -1148,7 +1286,7 @@ var parse = function parse() { parser.on('readable', function () { var record; - while (record = this.read()) { + while ((record = this.read()) !== null) { if (options === undefined || options.objname === undefined) { records.push(record); } else { @@ -1165,18 +1303,65 @@ var parse = function parse() { } if (data !== undefined) { - parser.write(data); - parser.end(); + // Give a chance for events to be registered later + if (typeof setImmediate === 'function') { + setImmediate(function () { + parser.write(data); + parser.end(); + }); + } else { + parser.write(data); + parser.end(); + } } return parser; }; +var CsvError = /*#__PURE__*/function (_Error) { + _inherits(CsvError, _Error); + + var _super2 = _createSuper(CsvError); + + function CsvError(code, message) { + var _this2; + + _classCallCheck(this, CsvError); + + if (Array.isArray(message)) message = message.join(' '); + _this2 = _super2.call(this, message); + + if (Error.captureStackTrace !== undefined) { + Error.captureStackTrace(_assertThisInitialized(_this2), CsvError); + } + + _this2.code = code; + + for (var _len = arguments.length, contexts = new Array(_len > 2 ? _len - 2 : 0), _key = 2; _key < _len; _key++) { + contexts[_key - 2] = arguments[_key]; + } + + for (var _i2 = 0, _contexts = contexts; _i2 < _contexts.length; _i2++) { + var context = _contexts[_i2]; + + for (var key in context) { + var value = context[key]; + _this2[key] = Buffer.isBuffer(value) ? value.toString() : value == null ? value : JSON.parse(JSON.stringify(value)); + } + } + + return _this2; + } + + return CsvError; +}( /*#__PURE__*/_wrapNativeSuper(Error)); + parse.Parser = Parser; +parse.CsvError = CsvError; module.exports = parse; var underscore = function underscore(str) { - return str.replace(/([A-Z])/g, function (_, match, index) { + return str.replace(/([A-Z])/g, function (_, match) { return '_' + match.toLowerCase(); }); }; @@ -1185,11 +1370,16 @@ var isObject = function isObject(obj) { return _typeof(obj) === 'object' && obj !== null && !Array.isArray(obj); }; +var isRecordEmpty = function isRecordEmpty(record) { + return record.every(function (field) { + return field == null || field.toString && field.toString().trim() === ''; + }); +}; + var normalizeColumnsArray = function normalizeColumnsArray(columns) { - // console.log('columns', columns) var normalizedColumns = []; - for (var i = 0; i < columns.length; i++) { + for (var i = 0, l = columns.length; i < l; i++) { var column = columns[i]; if (column === undefined || column === null || column === false) { @@ -1202,15 +1392,14 @@ var normalizeColumnsArray = function normalizeColumnsArray(columns) { }; } else if (isObject(column)) { if (typeof column.name !== 'string') { - throw new Error("Invalid Option columns: property \"name\" is required at position ".concat(i, " when column is an object literal")); + throw new CsvError('CSV_OPTION_COLUMNS_MISSING_NAME', ['Option columns missing name:', "property \"name\" is required at position ".concat(i), 'when column is an object literal']); } normalizedColumns[i] = column; } else { - throw new Error("Invalid Option columns: expect a string or an object, got ".concat(JSON.stringify(column), " at position ").concat(i)); + throw new CsvError('CSV_INVALID_COLUMN_DEFINITION', ['Invalid column definition:', 'expect a string or a literal object,', "got ".concat(JSON.stringify(column), " at position ").concat(i)]); } - } // console.log(normalizedColumns) - + } return normalizedColumns; }; \ No newline at end of file diff --git a/lib/es5/sync.js b/lib/es5/sync.js index c8db999..00106a0 100644 --- a/lib/es5/sync.js +++ b/lib/es5/sync.js @@ -13,6 +13,10 @@ module.exports = function (data) { var parser = new parse.Parser(options); parser.push = function (record) { + if (record === null) { + return; + } + if (options.objname === undefined) records.push(record);else { records[record[0]] = record[1]; } diff --git a/lib/index.d.ts b/lib/index.d.ts index 8be0723..47eccaf 100644 --- a/lib/index.d.ts +++ b/lib/index.d.ts @@ -23,13 +23,13 @@ declare namespace parse { __write(chars: any, end: any, callback: any): any; - readonly options: Options; + readonly options: Options readonly info: Info; } interface CastingContext { - readonly column?: number | string; + readonly column: number | string; readonly empty_lines: number; readonly header: boolean; readonly index: number; @@ -51,11 +51,13 @@ declare namespace parse { * @deprecated Use {@link cast} */ auto_parse?: boolean | CastingFunction; + autoParse?: boolean | CastingFunction; /** * If true, the parser will attempt to convert read data types to dates. It requires the "auto_parse" option. * @deprecated Use {@link cast_date} */ auto_parse_date?: boolean | CastingDateFunction; + autoParseDate?: boolean | CastingDateFunction; /** * If true, detect and exclude the byte order mark (BOM) from the CSV input if present. */ @@ -70,6 +72,7 @@ declare namespace parse { * If a function, receive the value as argument and return a new value. It requires the "auto_parse" option. Be careful, it relies on Date.parse. */ cast_date?: boolean | CastingDateFunction; + castDate?: boolean | CastingDateFunction; /** * List of fields as an array, * a user defined callback accepting the first line and returning the column names or true if autodiscovered in the first CSV line, @@ -77,6 +80,12 @@ declare namespace parse { * affect the result data set in the sense that records will be objects instead of arrays. */ columns?: ColumnOption[] | boolean | ((record: any) => ColumnOption[]); + /** + * Convert values into an array of values when columns are activated and + * when multiple columns of the same name are found. + */ + columns_duplicates_to_array?: boolean; + columnsDuplicatesToArray?: boolean; /** * Treat all the characters after this one as a comment, default to '' (disabled). */ @@ -84,7 +93,7 @@ declare namespace parse { /** * Set the field delimiter. One character only, defaults to comma. */ - delimiter?: string | Buffer; + delimiter?: string | string[] | Buffer; /** * Set the escape character, one character only, defaults to double quotes. */ @@ -97,6 +106,7 @@ declare namespace parse { * Start handling records from the requested line number. */ from_line?: number; + fromLine?: number; /** * Generate two properties `info` and `record` where `info` is a snapshot of the info object at the time the record was created and `record` is the parsed array or object. */ @@ -112,14 +122,20 @@ declare namespace parse { * default to 128000 characters. */ max_record_size?: number; + maxRecordSize?: number; /** * Name of header-record title to name objects by. */ objname?: string; + /** + * Alter and filter records by executing a user defined function. + */ + on_record?: (record: any, context: CastingContext) => any; + onRecord?: (record: any, context: CastingContext) => any; /** * Optional character surrounding a field, one character only, defaults to double quotes. */ - quote?: string | boolean | Buffer; + quote?: string | boolean | Buffer | null; /** * Generate two properties raw and row where raw is the original CSV row content and row is the parsed array or object. */ @@ -132,11 +148,23 @@ declare namespace parse { * Discard inconsistent columns count, default to false. */ relax_column_count?: boolean; + relaxColumnCount?: boolean; + /** + * Discard inconsistent columns count when the record contains less fields than expected, default to false. + */ + relax_column_count_less?: boolean; + relaxColumnCountLess?: boolean; + /** + * Discard inconsistent columns count when the record contains more fields than expected, default to false. + */ + relax_column_count_more?: boolean; + relaxColumnCountMore?: boolean; /** * One or multiple characters used to delimit record rows; defaults to auto discovery if not provided. * Supported auto discovery method are Linux ("\n"), Apple ("\r") and Windows ("\r\n") row delimiters. */ record_delimiter?: string | string[] | Buffer | Buffer[]; + recordDelimiter?: string | string[] | Buffer | Buffer[]; /** * If true, ignore whitespace immediately preceding the delimiter (i.e. right-trim all fields), defaults to false. * Does not remove whitespace in a quoted field. @@ -147,14 +175,17 @@ declare namespace parse { * Defaults to false */ skip_empty_lines?: boolean; + skipEmptyLines?: boolean; /** * Skip a line with error found inside and directly go process the next line. */ skip_lines_with_error?: boolean; + skipLinesWithError?: boolean; /** * Don't generate records for lines containing empty column values (column matching /\s*\/), defaults to false. */ skip_lines_with_empty_values?: boolean; + skipLinesWithEmptyValues?: boolean; /** * Stop handling records after the requested number of records. */ @@ -163,6 +194,7 @@ declare namespace parse { * Stop handling records after the requested line number. */ to_line?: number; + toLine?: number; /** * If true, ignore whitespace immediately around the delimiter, defaults to false. * Does not remove whitespace in a quoted field. @@ -192,4 +224,32 @@ declare namespace parse { */ readonly invalid_field_length: number; } + + class CsvError extends Error { + readonly code: CsvErrorCode; + [key: string]: any; + + constructor(code: CsvErrorCode, message: string | string[], ...contexts: any[]); + } + + type CsvErrorCode = + 'CSV_INVALID_OPTION_BOM' + | 'CSV_INVALID_OPTION_CAST' + | 'CSV_INVALID_OPTION_CAST_DATE' + | 'CSV_INVALID_OPTION_COLUMNS' + | 'CSV_INVALID_OPTION_COLUMNS_DUPLICATES_TO_ARRAY' + | 'CSV_INVALID_OPTION_COMMENT' + | 'CSV_INVALID_OPTION_DELIMITER' + | 'CSV_INVALID_OPTION_ON_RECORD' + | 'CSV_INVALID_CLOSING_QUOTE' + | 'INVALID_OPENING_QUOTE' + | 'CSV_INVALID_COLUMN_MAPPING' + | 'CSV_INVALID_ARGUMENT' + | 'CSV_INVALID_COLUMN_DEFINITION' + | 'CSV_MAX_RECORD_SIZE' + | 'CSV_NON_TRIMABLE_CHAR_AFTER_CLOSING_QUOTE' + | 'CSV_QUOTE_NOT_CLOSED' + | 'CSV_INCONSISTENT_RECORD_LENGTH' + | 'CSV_RECORD_DONT_MATCH_COLUMNS_LENGTH' + | 'CSV_OPTION_COLUMNS_MISSING_NAME' } diff --git a/lib/index.js b/lib/index.js index dd296c8..fb1a429 100644 --- a/lib/index.js +++ b/lib/index.js @@ -2,17 +2,18 @@ /* CSV Parse -Please look at the [project documentation](https://csv.js.org/parse/) for additional -information. +Please look at the [project documentation](https://csv.js.org/parse/) for +additional information. */ const { Transform } = require('stream') const ResizeableBuffer = require('./ResizeableBuffer') -const cr = 13 +const tab = 9 const nl = 10 +const np = 12 +const cr = 13 const space = 32 -const tab = 9 const bom_utf8 = Buffer.from([239, 187, 191]) class Parser extends Transform { @@ -27,7 +28,10 @@ class Parser extends Transform { if(options.bom === undefined || options.bom === null || options.bom === false){ options.bom = false }else if(options.bom !== true){ - throw new Error(`Invalid Option: bom must be true, got ${JSON.stringify(options.bom)}`) + throw new CsvError('CSV_INVALID_OPTION_BOM', [ + 'Invalid option bom:', 'bom must be true,', + `got ${JSON.stringify(options.bom)}` + ]) } // Normalize option `cast` let fnCastField = null @@ -37,7 +41,10 @@ class Parser extends Transform { fnCastField = options.cast options.cast = true }else if(options.cast !== true){ - throw new Error('Invalid Option: cast must be true or a function') + throw new CsvError('CSV_INVALID_OPTION_CAST', [ + 'Invalid option cast:', 'cast must be true or a function,', + `got ${JSON.stringify(options.cast)}` + ]) } // Normalize option `cast_date` if(options.cast_date === undefined || options.cast_date === null || options.cast_date === false || options.cast_date === ''){ @@ -48,7 +55,10 @@ class Parser extends Transform { return !isNaN(date) ? new Date(date) : value } }else if(typeof options.cast_date !== 'function'){ - throw new Error('Invalid Option: cast_date must be true or a function') + throw new CsvError('CSV_INVALID_OPTION_CAST_DATE', [ + 'Invalid option cast_date:', 'cast_date must be true or a function,', + `got ${JSON.stringify(options.cast_date)}` + ]) } // Normalize option `columns` let fnFirstLineToHeaders = null @@ -63,7 +73,21 @@ class Parser extends Transform { }else if(options.columns === undefined || options.columns === null || options.columns === false){ options.columns = false }else{ - throw new Error(`Invalid Option columns: expect an object or true, got ${JSON.stringify(options.columns)}`) + throw new CsvError('CSV_INVALID_OPTION_COLUMNS', [ + 'Invalid option columns:', + 'expect an object, a function or true,', + `got ${JSON.stringify(options.columns)}` + ]) + } + // Normalize option `columns_duplicates_to_array` + if(options.columns_duplicates_to_array === undefined || options.columns_duplicates_to_array === null || options.columns_duplicates_to_array === false){ + options.columns_duplicates_to_array = false + }else if(options.columns_duplicates_to_array !== true){ + throw new CsvError('CSV_INVALID_OPTION_COLUMNS_DUPLICATES_TO_ARRAY', [ + 'Invalid option columns_duplicates_to_array:', + 'expect an boolean,', + `got ${JSON.stringify(options.columns_duplicates_to_array)}` + ]) } // Normalize option `comment` if(options.comment === undefined || options.comment === null || options.comment === false || options.comment === ''){ @@ -73,37 +97,55 @@ class Parser extends Transform { options.comment = Buffer.from(options.comment) } if(!Buffer.isBuffer(options.comment)){ - throw new Error(`Invalid Option: comment must be a buffer or a string, got ${JSON.stringify(options.comment)}`) + throw new CsvError('CSV_INVALID_OPTION_COMMENT', [ + 'Invalid option comment:', + 'comment must be a buffer or a string,', + `got ${JSON.stringify(options.comment)}` + ]) } } // Normalize option `delimiter` - if(options.delimiter === undefined || options.delimiter === null || options.delimiter === false){ - options.delimiter = Buffer.from(',') - }else if(Buffer.isBuffer(options.delimiter)){ - if(options.delimiter.length === 0){ - throw new Error(`Invalid Option: delimiter must be a non empty buffer`) + const delimiter_json = JSON.stringify(options.delimiter) + if(!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter] + if(options.delimiter.length === 0){ + throw new CsvError('CSV_INVALID_OPTION_DELIMITER', [ + 'Invalid option delimiter:', + 'delimiter must be a non empty string or buffer or array of string|buffer,', + `got ${delimiter_json}` + ]) + } + options.delimiter = options.delimiter.map(function(delimiter){ + if(delimiter === undefined || delimiter === null || delimiter === false){ + return Buffer.from(',') } - // Great, nothing to do - }else if(typeof options.delimiter === 'string'){ - if(options.delimiter.length === 0){ - throw new Error(`Invalid Option: delimiter must be a non empty string`) + if(typeof delimiter === 'string'){ + delimiter = Buffer.from(delimiter) } - options.delimiter = Buffer.from(options.delimiter) - }else{ - throw new Error(`Invalid Option: delimiter must be a string or a buffer, got ${options.delimiter}`) - } + if( !Buffer.isBuffer(delimiter) || delimiter.length === 0){ + throw new CsvError('CSV_INVALID_OPTION_DELIMITER', [ + 'Invalid option delimiter:', + 'delimiter must be a non empty string or buffer or array of string|buffer,', + `got ${delimiter_json}` + ]) + } + return delimiter + }) // Normalize option `escape` - if(options.escape === undefined || options.escape === null){ + if(options.escape === undefined || options.escape === true){ options.escape = Buffer.from('"') }else if(typeof options.escape === 'string'){ options.escape = Buffer.from(options.escape) + }else if (options.escape === null || options.escape === false){ + options.escape = null } - if(!Buffer.isBuffer(options.escape)){ - throw new Error(`Invalid Option: escape must be a buffer or a string, got ${JSON.stringify(options.escape)}`) - }else if(options.escape.length !== 1){ - throw new Error(`Invalid Option Length: escape must be one character, got ${options.escape.length}`) - }else{ - options.escape = options.escape[0] + if(options.escape !== null){ + if(!Buffer.isBuffer(options.escape)){ + throw new Error(`Invalid Option: escape must be a buffer, a string or a boolean, got ${JSON.stringify(options.escape)}`) + }else if(options.escape.length !== 1){ + throw new Error(`Invalid Option Length: escape must be one character, got ${options.escape.length}`) + }else{ + options.escape = options.escape[0] + } } // Normalize option `from` if(options.from === undefined || options.from === null){ @@ -167,6 +209,16 @@ class Parser extends Transform { }else{ throw new Error(`Invalid Option: objname must be a string or a buffer, got ${options.objname}`) } + // Normalize option `on_record` + if(options.on_record === undefined || options.on_record === null){ + options.on_record = undefined + }else if(typeof options.on_record !== 'function'){ + throw new CsvError('CSV_INVALID_OPTION_ON_RECORD', [ + 'Invalid option `on_record`:', + 'expect a function,', + `got ${JSON.stringify(options.on_record)}` + ]) + } // Normalize option `quote` if(options.quote === null || options.quote === false || options.quote === ''){ options.quote = null @@ -218,6 +270,20 @@ class Parser extends Transform { }else{ throw new Error(`Invalid Option: relax_column_count must be a boolean, got ${JSON.stringify(options.relax_column_count)}`) } + if(typeof options.relax_column_count_less === 'boolean'){ + // Great, nothing to do + }else if(options.relax_column_count_less === undefined || options.relax_column_count_less === null){ + options.relax_column_count_less = false + }else{ + throw new Error(`Invalid Option: relax_column_count_less must be a boolean, got ${JSON.stringify(options.relax_column_count_less)}`) + } + if(typeof options.relax_column_count_more === 'boolean'){ + // Great, nothing to do + }else if(options.relax_column_count_more === undefined || options.relax_column_count_more === null){ + options.relax_column_count_more = false + }else{ + throw new Error(`Invalid Option: relax_column_count_more must be a boolean, got ${JSON.stringify(options.relax_column_count_more)}`) + } // Normalize option `skip_empty_lines` if(typeof options.skip_empty_lines === 'boolean'){ // Great, nothing to do @@ -354,9 +420,9 @@ class Parser extends Transform { } // Central parser implementation __parse(nextBuf, end){ - const {bom, comment, escape, from, from_line, info, ltrim, max_record_size, quote, raw, relax, rtrim, skip_empty_lines, to, to_line} = this.options + const {bom, comment, escape, from_line, info, ltrim, max_record_size, quote, raw, relax, rtrim, skip_empty_lines, to, to_line} = this.options let {record_delimiter} = this.options - const {bomSkipped, previousBuf, rawBuffer, escapeIsQuote, trimChars} = this.state + const {bomSkipped, previousBuf, rawBuffer, escapeIsQuote} = this.state let buf if(previousBuf === undefined){ if(nextBuf === undefined){ @@ -431,8 +497,8 @@ class Parser extends Transform { this.state.escaping = false }else{ // Escape is only active inside quoted fields - if(this.state.quoting === true && chr === escape && pos + 1 < bufLen){ - // We are quoting, the char is an escape chr and there is a chr to escape + // We are quoting, the char is an escape chr and there is a chr to escape + if(escape !== null && this.state.quoting === true && chr === escape && pos + 1 < bufLen){ if(escapeIsQuote){ if(buf[pos+1] === quote){ this.state.escaping = true @@ -456,14 +522,22 @@ class Parser extends Transform { // Escape a quote // Treat next char as a regular character // TODO: need to compare bytes instead of single char - if(chr === escape && nextChr === quote){ + if(escape !== null && chr === escape && nextChr === quote){ pos++ }else if(!nextChr || isNextChrDelimiter || isNextChrRowDelimiter || isNextChrComment || isNextChrTrimable){ this.state.quoting = false this.state.wasQuoting = true continue }else if(relax === false){ - const err = this.__error(`Invalid Closing Quote: got "${String.fromCharCode(nextChr)}" at line ${this.info.lines} instead of delimiter, row delimiter, trimable character (if activated) or comment`) + const err = this.__error( + new CsvError('CSV_INVALID_CLOSING_QUOTE', [ + 'Invalid Closing Quote:', + `got "${String.fromCharCode(nextChr)}"`, + `at line ${this.info.lines}`, + 'instead of delimiter, row delimiter, trimable character', + '(if activated) or comment', + ], this.__context()) + ) if(err !== undefined) return err }else{ this.state.quoting = false @@ -475,7 +549,14 @@ class Parser extends Transform { if(this.state.field.length !== 0){ // In relax mode, treat opening quote preceded by chrs as regular if( relax === false ){ - const err = this.__error(`Invalid opening quote at line ${this.info.lines}`) + const err = this.__error( + new CsvError('INVALID_OPENING_QUOTE', [ + 'Invalid Opening Quote:', + `a quote is found inside a field at line ${this.info.lines}`, + ], this.__context(), { + field: this.state.field, + }) + ) if(err !== undefined) return err } }else{ @@ -541,7 +622,14 @@ class Parser extends Transform { } if(this.state.commenting === false){ if(max_record_size !== 0 && this.state.record_length + this.state.field.length > max_record_size){ - const err = this.__error(`Max Record Size: record exceed the maximum number of tolerated bytes of ${max_record_size} on line ${this.info.lines}`) + const err = this.__error( + new CsvError('CSV_MAX_RECORD_SIZE', [ + 'Max Record Size:', + 'record exceed the maximum number of tolerated bytes', + `of ${max_record_size}`, + `at line ${this.info.lines}`, + ], this.__context()) + ) if(err !== undefined) return err } } @@ -552,13 +640,25 @@ class Parser extends Transform { if( lappend === true && rappend === true ){ this.state.field.append(chr) }else if(rtrim === true && !this.__isCharTrimable(chr)){ - const err = this.__error(`Invalid Closing Quote: found non trimable byte after quote at line ${this.info.lines}`) + const err = this.__error( + new CsvError('CSV_NON_TRIMABLE_CHAR_AFTER_CLOSING_QUOTE', [ + 'Invalid Closing Quote:', + 'found non trimable byte after quote', + `at line ${this.info.lines}`, + ], this.__context()) + ) if(err !== undefined) return err } } if(end === true){ + // Ensure we are not ending in a quoting state if(this.state.quoting === true){ - const err = this.__error(`Invalid Closing Quote: quote is not closed at line ${this.info.lines}`) + const err = this.__error( + new CsvError('CSV_QUOTE_NOT_CLOSED', [ + 'Quote Not Closed:', + `the parsing is finished with an opening quote at line ${this.info.lines}`, + ], this.__context()) + ) if(err !== undefined) return err }else{ // Skip last line if it has no characters @@ -583,38 +683,60 @@ class Parser extends Transform { } // Helper to test if a character is a space or a line delimiter __isCharTrimable(chr){ - return chr === space || chr === tab || chr === cr || chr === nl + return chr === space || chr === tab || chr === cr || chr === nl || chr === np } __onRow(){ - const {columns, info, from, relax_column_count, raw, skip_lines_with_empty_values} = this.options + const {columns, columns_duplicates_to_array, info, from, relax_column_count, relax_column_count_less, relax_column_count_more, raw, skip_lines_with_empty_values} = this.options const {enabled, record} = this.state + if(enabled === false){ + return this.__resetRow() + } // Convert the first line into column names + const recordLength = record.length if(columns === true){ + if(isRecordEmpty(record)){ + this.__resetRow() + return + } return this.__firstLineToColumns(record) } - const recordLength = record.length if(columns === false && this.info.records === 0){ this.state.expectedRecordLength = recordLength - }else if(enabled === true){ - if(recordLength !== this.state.expectedRecordLength){ - if(relax_column_count === true){ - this.info.invalid_field_length++ + } + if(recordLength !== this.state.expectedRecordLength){ + if(relax_column_count === true || + (relax_column_count_less === true && recordLength < this.state.expectedRecordLength) || + (relax_column_count_more === true && recordLength > this.state.expectedRecordLength) ){ + this.info.invalid_field_length++ + }else{ + if(columns === false){ + const err = this.__error( + new CsvError('CSV_INCONSISTENT_RECORD_LENGTH', [ + 'Invalid Record Length:', + `expect ${this.state.expectedRecordLength},`, + `got ${recordLength} on line ${this.info.lines}`, + ], this.__context(), { + record: record, + }) + ) + if(err !== undefined) return err }else{ - if(columns === false){ - const err = this.__error(`Invalid Record Length: expect ${this.state.expectedRecordLength}, got ${recordLength} on line ${this.info.lines}`) - if(err !== undefined) return err - }else{ - const err = this.__error(`Invalid Record Length: header length is ${columns.length}, got ${recordLength} on line ${this.info.lines}`) - if(err !== undefined) return err - } + const err = this.__error( + // CSV_INVALID_RECORD_LENGTH_DONT_MATCH_COLUMNS + new CsvError('CSV_RECORD_DONT_MATCH_COLUMNS_LENGTH', [ + 'Invalid Record Length:', + `columns length is ${columns.length},`, // rename columns + `got ${recordLength} on line ${this.info.lines}`, + ], this.__context(), { + record: record, + }) + ) + if(err !== undefined) return err } } } - if(enabled === false){ - return this.__resetRow() - } if(skip_lines_with_empty_values === true){ - if(record.map( (field) => field.trim() ).join('') === ''){ + if(isRecordEmpty(record)){ this.__resetRow() return } @@ -629,41 +751,69 @@ class Parser extends Transform { if(columns !== false){ const obj = {} // Transform record array to an object - for(let i in record){ + for(let i = 0, l = record.length; i < l; i++){ if(columns[i] === undefined || columns[i].disabled) continue - obj[columns[i].name] = record[i] + // obj[columns[i].name] = record[i] + // Turn duplicate columns into an array + if (columns_duplicates_to_array === true && obj[columns[i].name]) { + if (Array.isArray(obj[columns[i].name])) { + obj[columns[i].name] = obj[columns[i].name].concat(record[i]) + } else { + obj[columns[i].name] = [obj[columns[i].name], record[i]] + } + } else { + obj[columns[i].name] = record[i] + } } const {objname} = this.options if(objname === undefined){ if(raw === true || info === true){ - this.push(Object.assign( + const err = this.__push(Object.assign( {record: obj}, (raw === true ? {raw: this.state.rawBuffer.toString()}: {}), (info === true ? {info: this.state.info}: {}) )) + if(err){ + return err + } }else{ - this.push(obj) + const err = this.__push(obj) + if(err){ + return err + } } }else{ if(raw === true || info === true){ - this.push(Object.assign( + const err = this.__push(Object.assign( {record: [obj[objname], obj]}, raw === true ? {raw: this.state.rawBuffer.toString()}: {}, info === true ? {info: this.state.info}: {} )) + if(err){ + return err + } }else{ - this.push([obj[objname], obj]) + const err = this.__push([obj[objname], obj]) + if(err){ + return err + } } } }else{ if(raw === true || info === true){ - this.push(Object.assign( + const err = this.__push(Object.assign( {record: record}, raw === true ? {raw: this.state.rawBuffer.toString()}: {}, info === true ? {info: this.state.info}: {} )) + if(err){ + return err + } }else{ - this.push(record) + const err = this.__push(record) + if(err){ + return err + } } } } @@ -672,10 +822,17 @@ class Parser extends Transform { __firstLineToColumns(record){ const {firstLineToHeaders} = this.state try{ - // record = record.filter(function(field){ return field !== undefined}) const headers = firstLineToHeaders === undefined ? record : firstLineToHeaders.call(null, record) if(!Array.isArray(headers)){ - return this.__error(`Invalid Header Mapping: expect an array, got ${JSON.stringify(headers)}`) + return this.__error( + new CsvError('CSV_INVALID_COLUMN_MAPPING', [ + 'Invalid Column Mapping:', + 'expect an array from column function,', + `got ${JSON.stringify(headers)}` + ], this.__context(), { + headers: headers, + }) + ) } const normalizedHeaders = normalizeColumnsArray(headers) this.state.expectedRecordLength = normalizedHeaders.length @@ -687,7 +844,6 @@ class Parser extends Transform { } } __resetRow(){ - const {info} = this.options if(this.options.raw === true){ this.state.rawBuffer.reset() } @@ -697,8 +853,8 @@ class Parser extends Transform { __onField(){ const {cast, rtrim, max_record_size} = this.options const {enabled, wasQuoting} = this.state - // Deal with from_to options - if(this.options.columns !== true && enabled === false){ + // Short circuit for the from_line options + if(enabled === false){ /* this.options.columns !== true && */ return this.__resetField() } let field = this.state.field.toString() @@ -721,25 +877,30 @@ class Parser extends Transform { this.state.field.reset() this.state.wasQuoting = false } + __push(record){ + const {on_record} = this.options + if(on_record !== undefined){ + const context = this.__context() + try{ + record = on_record.call(null, record, context) + }catch(err){ + return err + } + if(record === undefined || record === null){ return } + } + this.push(record) + } // Return a tuple with the error and the casted value __cast(field){ - const isColumns = Array.isArray(this.options.columns) - // Dont loose time calling cast if the field wont be part of the final record - if( isColumns === true && this.options.columns.length <= this.state.record.length ){ + const {columns, relax_column_count} = this.options + const isColumns = Array.isArray(columns) + // Dont loose time calling cast + // because the final record is an object + // and this field can't be associated to a key present in columns + if( isColumns === true && relax_column_count && this.options.columns.length <= this.state.record.length ){ return [undefined, undefined] } - const context = { - column: isColumns === true ? - this.options.columns[this.state.record.length].name : - this.state.record.length, - empty_lines: this.info.empty_lines, - header: this.options.columns === true, - index: this.state.record.length, - invalid_field_length: this.info.invalid_field_length, - quoting: this.state.wasQuoting, - lines: this.info.lines, - records: this.info.records - } + const context = this.__context() if(this.state.castField !== null){ try{ return [undefined, this.state.castField.call(null, field, context)] @@ -747,18 +908,19 @@ class Parser extends Transform { return [err] } } - if(this.__isInt(field) === true){ - return [undefined, parseInt(field)] - }else if(this.__isFloat(field)){ + if(this.__isFloat(field)){ return [undefined, parseFloat(field)] }else if(this.options.cast_date !== false){ return [undefined, this.options.cast_date.call(null, field, context)] } return [undefined, field] } - __isInt(value){ - return /^(\-|\+)?([1-9]+[0-9]*)$/.test(value) - } + // Keep it in case we implement the `cast_int` option + // __isInt(value){ + // // return Number.isInteger(parseInt(value)) + // // return !isNaN( parseInt( obj ) ); + // return /^(\-|\+)?[1-9][0-9]*$/.test(value) + // } __isFloat(value){ return (value - parseFloat( value ) + 1) >= 0 // Borrowed from jquery } @@ -774,7 +936,7 @@ class Parser extends Transform { if(end){ return false } - const {comment, delimiter, escape} = this.options + const {comment, delimiter} = this.options const {quoting, recordDelimiterMaxLength} = this.state const numOfCharLeft = bufLen - i - 1 const requiredLength = Math.max( @@ -795,12 +957,16 @@ class Parser extends Transform { } __isDelimiter(chr, buf, pos){ const {delimiter} = this.options - const delLength = delimiter.length - if(delimiter[0] !== chr) return 0 - for(let i = 1; i < delLength; i++){ - if(delimiter[i] !== buf[pos+i]) return 0 + loop1: for(let i = 0; i < delimiter.length; i++){ + const del = delimiter[i] + if(del[0] === chr){ + for(let j = 1; j < del.length; j++){ + if(del[j] !== buf[pos+j]) continue loop1 + } + return del.length + } } - return delimiter.length + return 0 } __isRecordDelimiter(chr, buf, pos){ const {record_delimiter} = this.options @@ -841,7 +1007,7 @@ class Parser extends Transform { } __error(msg){ const {skip_lines_with_error} = this.options - const err = new Error(msg) + const err = typeof msg === 'string' ? new Error(msg) : msg if(skip_lines_with_error){ this.state.recordHasError = true this.emit('skip', err) @@ -850,6 +1016,25 @@ class Parser extends Transform { return err } } + __context(){ + const {columns} = this.options + const isColumns = Array.isArray(columns) + return { + column: isColumns === true ? + ( columns.length > this.state.record.length ? + columns[this.state.record.length].name : + null + ) : + this.state.record.length, + empty_lines: this.info.empty_lines, + header: columns === true, + index: this.state.record.length, + invalid_field_length: this.info.invalid_field_length, + quoting: this.state.wasQuoting, + lines: this.info.lines, + records: this.info.records + } + } } const parse = function(){ @@ -864,7 +1049,10 @@ const parse = function(){ }else if(callback === undefined && type === 'function'){ callback = argument }else{ - throw new Error(`Invalid argument: got ${JSON.stringify(argument)} at index ${i}`) + throw new CsvError('CSV_INVALID_ARGUMENT', [ + 'Invalid argument:', + `got ${JSON.stringify(argument)} at index ${i}` + ]) } } const parser = new Parser(options) @@ -872,7 +1060,7 @@ const parse = function(){ const records = options === undefined || options.objname === undefined ? [] : {} parser.on('readable', function(){ let record - while(record = this.read()){ + while((record = this.read()) !== null){ if(options === undefined || options.objname === undefined){ records.push(record) }else{ @@ -888,18 +1076,45 @@ const parse = function(){ }) } if(data !== undefined){ - parser.write(data) - parser.end() + // Give a chance for events to be registered later + if(typeof setImmediate === 'function'){ + setImmediate(function(){ + parser.write(data) + parser.end() + }) + }else{ + parser.write(data) + parser.end() + } } return parser } +class CsvError extends Error { + constructor(code, message, ...contexts) { + if(Array.isArray(message)) message = message.join(' ') + super(message) + if(Error.captureStackTrace !== undefined){ + Error.captureStackTrace(this, CsvError) + } + this.code = code + for(const context of contexts){ + for(const key in context){ + const value = context[key] + this[key] = Buffer.isBuffer(value) ? value.toString() : value == null ? value : JSON.parse(JSON.stringify(value)) + } + } + } +} + parse.Parser = Parser +parse.CsvError = CsvError + module.exports = parse const underscore = function(str){ - return str.replace(/([A-Z])/g, function(_, match, index){ + return str.replace(/([A-Z])/g, function(_, match){ return '_' + match.toLowerCase() }) } @@ -908,11 +1123,13 @@ const isObject = function(obj){ return (typeof obj === 'object' && obj !== null && !Array.isArray(obj)) } +const isRecordEmpty = function(record){ + return record.every( (field) => field == null || field.toString && field.toString().trim() === '' ) +} + const normalizeColumnsArray = function(columns){ - // console.log('columns', columns) const normalizedColumns = []; - - for(let i=0; i< columns.length; i++){ + for(let i = 0, l = columns.length; i < l; i++){ const column = columns[i] if(column === undefined || column === null || column === false){ normalizedColumns[i] = { disabled: true } @@ -920,13 +1137,20 @@ const normalizeColumnsArray = function(columns){ normalizedColumns[i] = { name: column } }else if(isObject(column)){ if(typeof column.name !== 'string'){ - throw new Error(`Invalid Option columns: property "name" is required at position ${i} when column is an object literal`) + throw new CsvError('CSV_OPTION_COLUMNS_MISSING_NAME', [ + 'Option columns missing name:', + `property "name" is required at position ${i}`, + 'when column is an object literal' + ]) } normalizedColumns[i] = column }else{ - throw new Error(`Invalid Option columns: expect a string or an object, got ${JSON.stringify(column)} at position ${i}`) + throw new CsvError('CSV_INVALID_COLUMN_DEFINITION', [ + 'Invalid column definition:', + 'expect a string or a literal object,', + `got ${JSON.stringify(column)} at position ${i}` + ]) } } - // console.log(normalizedColumns) return normalizedColumns; } diff --git a/lib/sync.js b/lib/sync.js index 7704a8b..3f592de 100644 --- a/lib/sync.js +++ b/lib/sync.js @@ -8,6 +8,9 @@ module.exports = function(data, options={}){ const records = options && options.objname ? {} : [] const parser = new parse.Parser(options) parser.push = function(record){ + if(record === null){ + return + } if(options.objname === undefined) records.push(record) else{ diff --git a/package.json b/package.json index 7a6aa31..940b6f3 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { - "version": "4.4.5", + "version": "4.12.0", "name": "csv-parse", "description": "CSV parsing implementing the Node.js `stream.Transform` API", "keywords": [ @@ -23,8 +23,9 @@ "level": "ignore" } }, + "author": "David Worms (https://www.adaltas.com)", "contributors": [ - "David Worms (http://www.adaltas.com)", + "David Worms (https://www.adaltas.com)", "Will White (https://github.com/willwhite)", "Justin Latimer (https://github.com/justinlatimer)", "jonseymour (https://github.com/jonseymour)", @@ -57,7 +58,9 @@ "csv-generate": "^3.2.3", "csv-spectrum": "^1.0.0", "each": "^1.2.1", + "eslint": "^6.5.1", "mocha": "^6.2.0", + "pad": "^3.2.0", "should": "^13.2.3", "stream-transform": "^2.0.1", "ts-node": "^8.3.0", @@ -76,10 +79,8 @@ "minor": "npm version minor -m 'Bump to version %s'", "major": "npm version major -m 'Bump to version %s'", "pretest": "cd lib && babel *.js -d es5 && cd ..", - "test": "mocha test/**/*.{coffee,ts}" + "lint": "eslint lib/*.js", + "test": "npm run lint && TS_NODE_COMPILER_OPTIONS='{\"strictNullChecks\":true}' mocha test/**/*.{coffee,ts}" }, - "types": [ - "./lib/index.d.ts", - "./lib/sync.d.ts" - ] + "types": "./lib/index.d.ts" } diff --git a/samples/async.iterator.js b/samples/async.iterator.js new file mode 100644 index 0000000..258fa6d --- /dev/null +++ b/samples/async.iterator.js @@ -0,0 +1,29 @@ + +const assert = require('assert'); +const generate = require('csv-generate'); +const parse = require('..'); + +(async () => { + // Initialise the parser by generating random records + const parser = generate({ + high_water_mark: 64 * 64, + length: 1000 + }).pipe( + parse() + ) + // Intialise count + let count = 0; + // Report start + process.stdout.write('start\n') + // Iterate through each records + for await (const record of parser) { + // Report current line + process.stdout.write(`${count++} ${record.join(',')}\n`) + // Fake asynchronous operation + await new Promise((resolve) => setTimeout(resolve, 100)) + } + // Report end + process.stdout.write('...done\n') + // Validation + assert.strictEqual(count, 10000) +})() diff --git a/samples/option.bom.hidden.js b/samples/option.bom.hidden.js new file mode 100644 index 0000000..f0fcb2d --- /dev/null +++ b/samples/option.bom.hidden.js @@ -0,0 +1,13 @@ + +const parse = require('../lib/sync') +const assert = require('assert') + +const data = "\ufeffkey\nvalue" +const records = parse(data, { + bom: false, + columns: true +}) +// It seems that the output is perfectly fine +assert.equal(JSON.stringify(records[0]), '{"key":"value"}') +// However, the first property include the BOM bytes +assert.equal(Object.keys(records[0])[0], '\ufeffkey') diff --git a/samples/option.columns_duplicates_to_array.js b/samples/option.columns_duplicates_to_array.js new file mode 100644 index 0000000..5f4e2df --- /dev/null +++ b/samples/option.columns_duplicates_to_array.js @@ -0,0 +1,22 @@ + +const parse = require('..') +const assert = require('assert') + +parse(` +friend,username,friend +athos,porthos,aramis +porthos,d_artagnan,athos +`.trim(), { + columns: true, + columns_duplicates_to_array: true +}, function(err, records){ + assert.deepEqual( + records, [{ + username: 'porthos', + friend: ['athos', 'aramis'] + }, { + username: 'd_artagnan', + friend: ['porthos', 'athos'] + }] + ) +}) diff --git a/samples/option.comment.js b/samples/option.comment.js new file mode 100644 index 0000000..adb9b6c --- /dev/null +++ b/samples/option.comment.js @@ -0,0 +1,16 @@ + +const parse = require('../lib/sync') +const assert = require('assert') + +const data = ` +# At the begening of a record +"hello" +"world"# At the end of a record +`.trim() +const records = parse(data, { + comment: "#" +}) +assert.deepEqual(records, [ + [ "hello" ], + [ "world" ] +]) diff --git a/samples/option.delimiter.js b/samples/option.delimiter.js new file mode 100644 index 0000000..e27b7ce --- /dev/null +++ b/samples/option.delimiter.js @@ -0,0 +1,12 @@ + +const parse = require('../lib/sync') +const assert = require('assert') + +const data = 'a key => a value' +const records = parse(data, { + delimiter: "=>", + trim: true +}) +assert.deepEqual(records, [ + [ "a key", "a value" ] +]) diff --git a/samples/option.escape.custom.js b/samples/option.escape.custom.js new file mode 100644 index 0000000..eb008db --- /dev/null +++ b/samples/option.escape.custom.js @@ -0,0 +1,8 @@ +const parse = require('../lib/sync') +const assert = require('assert') + +const data = `a,"b\\"c",d` +const records = parse(data, { escape: '\\' }) +assert.deepEqual(records, [ + [ 'a', 'b"c', 'd' ] +]) diff --git a/samples/option.escape.default.js b/samples/option.escape.default.js new file mode 100644 index 0000000..b91ee41 --- /dev/null +++ b/samples/option.escape.default.js @@ -0,0 +1,8 @@ +const parse = require('../lib/sync') +const assert = require('assert') + +const data = `a,"b""c",d` +const records = parse(data) +assert.deepEqual(records, [ + [ 'a', 'b"c', 'd' ] +]) diff --git a/samples/option.from_line.js b/samples/option.from_line.js new file mode 100644 index 0000000..1d79132 --- /dev/null +++ b/samples/option.from_line.js @@ -0,0 +1,19 @@ + +const parse = require('../lib') +const assert = require('assert') + +parse(` +x,x +a,b +1,2 +`.trim(), { + columns: true, + from_line: 2 +}, function(err, records){ + assert.deepEqual( + records, [{ + a: '1', + b: '2' + }] + ) +}) diff --git a/samples/option.on_record.alter.js b/samples/option.on_record.alter.js new file mode 100644 index 0000000..1c24e57 --- /dev/null +++ b/samples/option.on_record.alter.js @@ -0,0 +1,18 @@ + +const parse = require('../lib') +const assert = require('assert') + +parse(` +a.1,a.2,a.3 +b.1,b.2,b.3 +`.trim(), { + on_record: (record, {lines}) => + [lines, record[2], record[0]] +}, function(err, records){ + assert.deepEqual( + records, [ + [1, 'a.3', 'a.1'], + [2, 'b.3', 'b.1'] + ] + ) +}) diff --git a/samples/option.on_record.filter.js b/samples/option.on_record.filter.js new file mode 100644 index 0000000..bf5df0f --- /dev/null +++ b/samples/option.on_record.filter.js @@ -0,0 +1,19 @@ + +const parse = require('..') +const assert = require('assert') + +parse(` +line 1 +line 2 +line 3 +`.trim(), { + on_record: (record, {lines}) => + lines === 2 ? null : record +}, function(err, records){ + assert.deepEqual( + records, [ + [`line 1`], + [`line 3`] + ] + ) +}) diff --git a/samples/option.to_line.js b/samples/option.to_line.js new file mode 100644 index 0000000..d551fa7 --- /dev/null +++ b/samples/option.to_line.js @@ -0,0 +1,16 @@ +const parse = require('../lib/sync') +const assert = require('assert') + +const records = parse(` +a,1 +b,1 +x,x +`.trim(), { + to_line: 2 +}) +assert.deepEqual( + records, [ + [ 'a', '1' ], + [ 'b', '1' ] + ] +) diff --git a/samples/option.trim.js b/samples/option.trim.js new file mode 100644 index 0000000..8ea0618 --- /dev/null +++ b/samples/option.trim.js @@ -0,0 +1,18 @@ + +const parse = require('../lib/sync') +const assert = require('assert') + +const records = parse(` +a ,1 +b, 2 + c,3 +`.trim(), { + trim: true +}) +assert.deepEqual( + records, [ + [ 'a', '1' ], + [ 'b', '2' ], + [ 'c', '3' ] + ] +) diff --git a/samples/api.pipe.js b/samples/recipe.pipe.js similarity index 100% rename from samples/api.pipe.js rename to samples/recipe.pipe.js diff --git a/samples/recipies.file.js b/samples/recipies.file.js new file mode 100644 index 0000000..8e48c68 --- /dev/null +++ b/samples/recipies.file.js @@ -0,0 +1,22 @@ + +const os = require('os'); +const fs = require('fs').promises; +const parse = require('../lib/sync'); + +(async function(){ + // Prepare the dataset + await fs.writeFile(`${os.tmpdir()}/input.csv`, [ + '\ufeff', // BOM + 'a,1\n', // First record + 'b,2\n' // Second record + ].join(''), {encoding: 'utf8'}) + // Read the content + const content = await fs.readFile(`${os.tmpdir()}/input.csv`) + // Parse the CSV content + const records = parse(content) + // Print records to the console + // records.map( record => console.log(record) ) + // Write a file with one JSON per line for each record + json = records.map( JSON.stringify ).join('\n') + fs.writeFile(`${os.tmpdir()}/output.csv`, json) +})() diff --git a/test/ResizableBuffer.coffee b/test/ResizableBuffer.coffee index b0e96f7..e3b2277 100644 --- a/test/ResizableBuffer.coffee +++ b/test/ResizableBuffer.coffee @@ -63,3 +63,10 @@ describe 'ResizeableBuffer', -> for i in [0...buf.length] rb.append(buf[i]) rb.clone().toString().should.eql 'hij,klm;' + + describe 'toJSON', -> + rb = new ResizeableBuffer(5) + buf = Buffer.from 'abc,def;' + for i in [0...buf.length] + rb.append(buf[i]) + rb.toJSON().should.eql 'abc,def;' diff --git a/test/api.arguments.coffee b/test/api.arguments.coffee index 8f9abd9..03f16b0 100644 --- a/test/api.arguments.coffee +++ b/test/api.arguments.coffee @@ -1,6 +1,7 @@ generate = require 'csv-generate' parse = require '../lib' +assert_error = require './api.assert_error' describe 'API arguments', -> @@ -79,22 +80,30 @@ describe 'API arguments', -> it 'data:undefined, options:object', -> (-> parse undefined, {} - ).should.throw 'Invalid argument: got undefined at index 0' + ).should.throw + message: 'Invalid argument: got undefined at index 0' + code: 'CSV_INVALID_ARGUMENT' it 'data:undefined, callback:function', -> (-> parse undefined, (->) - ).should.throw 'Invalid argument: got undefined at index 0' + ).should.throw + message: 'Invalid argument: got undefined at index 0' + code: 'CSV_INVALID_ARGUMENT' it 'data:array, callback:function', -> (-> parse ['value a,value b', 'value 1,value 2'], (->) - ).should.throw 'Invalid argument: got ["value a,value b","value 1,value 2"] at index 0' + ).should.throw + message: 'Invalid argument: got ["value a,value b","value 1,value 2"] at index 0' + code: 'CSV_INVALID_ARGUMENT' it 'options:object, options:object', -> (-> parse {}, {} - ).should.throw 'Invalid argument: got {} at index 1' + ).should.throw + message: 'Invalid argument: got {} at index 1' + code: 'CSV_INVALID_ARGUMENT' describe '3 args', -> @@ -111,10 +120,14 @@ describe 'API arguments', -> it 'data:undefined, options:object, callback:function', -> (-> parse undefined, columns: true, (->) - ).should.throw 'Invalid argument: got undefined at index 0' + ).should.throw + message: 'Invalid argument: got undefined at index 0' + code: 'CSV_INVALID_ARGUMENT' it 'data:string, options:object, callback:undefined', -> (-> parse 'field_1,field_2\nvalue 1,value 2', columns: true, undefined - ).should.throw 'Invalid argument: got undefined at index 2' + ).should.throw + message: 'Invalid argument: got undefined at index 2' + code: 'CSV_INVALID_ARGUMENT' diff --git a/test/api.assert_error.coffee b/test/api.assert_error.coffee new file mode 100644 index 0000000..3587369 --- /dev/null +++ b/test/api.assert_error.coffee @@ -0,0 +1,112 @@ + +{CsvError} = require '..' +ResizeableBuffer = require '../lib/ResizeableBuffer' + +module.exports = assert_error = (err, assert = {}, exhaustive = false) -> + if Array.isArray err + assert_error e, assert[i] for e, i in err + return + if exhaustive then for key, value of err + assert.should.have.keys(key) + err.should.be.an.Error() + for key, expect of assert + value = err[key] + if typeof expect is 'string' + # eg, convert a buffer + value = value.toString() if value?.toString? + should(value).deepEqual expect + else if expect instanceof RegExp + should(value).match expect + else if expect is undefined + should(value).be.undefined() + else if expect is null + should(value).be.null() + else + should(value).deepEqual expect + +describe 'API assert_error', -> + + it 'work on array', -> + err = new CsvError 'A_MESSAGE', 'A message' + assert_error [err, err], [ + code: 'A_MESSAGE' + message: 'A message' + , + code: 'A_MESSAGE' + message: 'A message' + ] + + it 'exhaustive detect a property not in assert', -> + err = new CsvError 'A_MESSAGE', 'A message', a_key: 'a value' + ( -> + assert_error err, + code: 'A_MESSAGE' + message: 'A message' + , true + ).should.throw /expected Object .* to have key a_key/ + + it 'detect a property not in error', -> + err = new CsvError 'A_MESSAGE', 'A message' + ( -> + assert_error err, + code: 'A_MESSAGE' + message: 'A message' + a_key: 'a value' + ).should.throw "expected undefined to equal 'a value'" + + it 'validate a string value', -> + err = new CsvError 'A_MESSAGE', 'A message' + assert_error err, + code: 'A_MESSAGE' + message: 'A message' + ( -> + assert_error err, + code: 'A_MESSAGE' + message: 'Another mesage' + ).should.throw "expected 'A message' to equal 'Another mesage'" + + it 'validate a null value', -> + err = new CsvError 'A_MESSAGE', 'A message' + ( -> + assert_error err, + code: 'A_MESSAGE' + message: null + ).should.throw "expected 'A message' to be null" + + it 'validate a undefined value', -> + err = new CsvError 'A_MESSAGE', 'A message' + ( -> + assert_error err, + code: 'A_MESSAGE' + message: undefined + ).should.throw "expected 'A message' to be undefined" + + it 'validate a boolean true value', -> + err = new CsvError 'A_MESSAGE', 'A message', a_boolean: true + assert_error err, + a_boolean: true + ( -> + assert_error err, + a_boolean: false + ).should.throw "expected true to equal false" + + it 'validate a boolean true value', -> + err = new CsvError 'A_MESSAGE', 'A message', a_boolean: false + assert_error err, + a_boolean: false + ( -> + assert_error err, + a_boolean: true + ).should.throw "expected false to equal true" + + it 'validate a regexp value', -> + err = new CsvError 'A_MESSAGE', 'A message' + assert_error err, + code: 'A_MESSAGE' + message: /^A.*/ + ( -> + assert_error err, + code: 'A_MESSAGE' + message: /^Another.*/ + ).should.throw "expected 'A message' to match /^Another.*/" + diff --git a/test/api.destroy.coffee b/test/api.destroy.coffee new file mode 100644 index 0000000..44dd289 --- /dev/null +++ b/test/api.destroy.coffee @@ -0,0 +1,58 @@ + +parse = require '../lib' +generate = require 'csv-generate' +fs = require 'fs' +os = require 'os' + +describe 'API destroy', -> + + it 'inside readable with input string', (next) -> + parser = parse() + parser.on 'readable', -> + while this.read() + parser.destroy(Error 'Catch me') + parser.write """ + "ABC","45" + "DEF","23" + """ + parser.on 'error', (err) -> + err.message.should.eql 'Catch me' + parser._readableState.destroyed.should.be.true() + next() + parser.on 'end', -> + next Error 'End event shouldnt be called' + # Note, removing =nextTick trigger both the error and end events + process.nextTick -> + parser.end() + + it 'inside readable with fs input stream', (next) -> + fs.writeFile "#{os.tmpdir()}/data.csv", "a,b,c\n1,2,3", (err) -> + return next err if err + parser = parse() + parser.on 'readable', -> + while data = this.read() + parser.destroy(Error 'Catch me') + parser.on 'error', (err) -> + err.message.should.eql 'Catch me' + parser._readableState.destroyed.should.be.true() + next() + parser.on 'end', -> + next Error 'End event shouldnt be called' + fs + .createReadStream "#{os.tmpdir()}/data.csv" + .pipe parser + + it 'inside readable with generator input stream', (next) -> + # csv-generate emit data synchronously, it cant detect error on time + parser = parse() + parser.on 'readable', -> + while data = this.read() + parser.destroy(Error 'Catch me') + parser.on 'error', (err) -> + err.message.should.eql 'Catch me' + parser._readableState.destroyed.should.be.true() + next() if /^v(14)\./.test process.version + parser.on 'end', -> + next() + generate length: 2, seed: 1, columns: 2, fixed_size: true + .pipe parser diff --git a/test/api.error.coffee b/test/api.error.coffee new file mode 100644 index 0000000..9ee7eeb --- /dev/null +++ b/test/api.error.coffee @@ -0,0 +1,33 @@ + +parse = require '..' +assert_error = require './api.assert_error' + +describe 'API error', -> + + it 'set code', -> + err = new parse.CsvError 'MY_CODE', ['a', 'b', 'c'] + err.code.should.eql 'MY_CODE' + + it 'convert array message to string', -> + err = new parse.CsvError 'MY_CODE', ['a', 'b', 'c'] + err.message.should.eql 'a b c' + + it 'set additional context information', -> + err = new parse.CsvError 'MY_CODE', 'msg', a: 1, b: 2 + err.a.should.eql 1 + err.b.should.eql 2 + + it 'errors are enriched by context', -> + parse 'a"b', (err) -> + assert_error err, + message: /Invalid Opening Quote/ + code: 'INVALID_OPENING_QUOTE' + column: 0 + empty_lines: 0 + header: false + index: 0 + invalid_field_length: 0 + quoting: false + lines: 1 + records: 0 + field: 'a' diff --git a/test/api.events.coffee b/test/api.events.coffee index 59bf11e..041bcd8 100644 --- a/test/api.events.coffee +++ b/test/api.events.coffee @@ -1,5 +1,6 @@ parse = require '../lib' +assert_error = require './api.assert_error' describe 'API events', -> @@ -44,7 +45,10 @@ describe 'API events', -> x " e f", x " g h" ''' parser = parse (err, data) -> - err.message.should.eql 'Invalid opening quote at line 1' + assert_error err, + message: 'Invalid Opening Quote: a quote is found inside a field at line 1' + code: 'INVALID_OPENING_QUOTE' + field: ' x ' next() parser.write chr for chr in data parser.end() @@ -63,3 +67,15 @@ describe 'API events', -> b,b """ parser.end() + + it 'emit error with data as argument', (next) -> + parser = parse """ + a,a,a + b,b + c,c,c + """ + parser.on 'end', -> + next Error 'End should not be fired' + parser.on 'error', (err) -> + err.message.should.eql 'Invalid Record Length: expect 3, got 2 on line 2' + next() diff --git a/test/api.sync.coffee b/test/api.sync.coffee index d532438..d2c9c56 100644 --- a/test/api.sync.coffee +++ b/test/api.sync.coffee @@ -22,6 +22,10 @@ describe 'API sync', -> 'name 2': {'field_1': 'name 2', 'field_2': 'value 2'} } + it 'honors to_line', -> + data = parse '1\n2\n3\n4', to_line: 2 + data.should.eql [ [ '1' ], [ '2' ] ] + it 'catch errors', -> try parse 'A,B\nB\nC,K', trim: true diff --git a/test/api.types.ts b/test/api.types.ts index 4ed1057..38a698c 100644 --- a/test/api.types.ts +++ b/test/api.types.ts @@ -1,13 +1,14 @@ import 'should' import * as parse from '../lib/index' -import {CastingContext, Info, Options, Parser} from '../lib/index' +import * as parse_sync from '../lib/sync' +import {CastingContext, Info, Options, Parser, CsvError} from '../lib/index' describe('API Types', () => { - describe('Parser', () => { + describe('stream/callback API', () => { - it('Respect parse signature', () =>{ + it('respect parse signature', () => { // No argument parse() parse("") @@ -26,21 +27,22 @@ describe('API Types', () => { it('Expose options', () => { const parser: Parser = parse() const options: Options = parser.options - const keys: any = Object.keys(options) + const keys: string[] = Object.keys(options) keys.sort().should.eql([ - 'bom', 'cast', 'cast_date', 'columns', 'comment', 'delimiter', - 'escape', 'from', 'from_line', 'info', 'ltrim', 'max_record_size', - 'objname', 'quote', 'raw', 'record_delimiter', - 'relax', 'relax_column_count', 'rtrim', 'skip_empty_lines', - 'skip_lines_with_empty_values', 'skip_lines_with_error', 'to', - 'to_line', 'trim' + 'bom', 'cast', 'cast_date', 'columns', 'columns_duplicates_to_array', + 'comment', 'delimiter', 'escape', 'from', 'from_line', 'info', 'ltrim', + 'max_record_size', 'objname', 'on_record', 'quote', 'raw', + 'record_delimiter', 'relax', 'relax_column_count', + 'relax_column_count_less', 'relax_column_count_more', 'rtrim', + 'skip_empty_lines', 'skip_lines_with_empty_values', + 'skip_lines_with_error', 'to', 'to_line', 'trim' ]) }) it('Expose info', () => { const parser: Parser = parse() const info: Info = parser.info - const keys: any = Object.keys(info) + const keys: string[] = Object.keys(info) keys.sort().should.eql([ 'comment_lines', 'empty_lines', 'invalid_field_length', 'lines', 'records' @@ -59,6 +61,27 @@ describe('API Types', () => { }) + describe('sync api', () => { + + it('respect parse signature', () => { + // No argument + parse_sync("") + parse_sync("", {}) + parse_sync(Buffer.from("")) + parse_sync(Buffer.from(""), {}) + }) + + it('return records', () => { + try { + const data: object = parse_sync("") + typeof data + }catch (err){ + err.message + } + }) + + }) + describe('Info', () => { const fakeinfo = { @@ -109,6 +132,7 @@ describe('API Types', () => { it('cast_date', () => { const options: Options = {} options.cast_date = true + options.castDate = true }) it('columns', () => { @@ -128,6 +152,12 @@ describe('API Types', () => { } }) + it('columns_duplicates_to_array', () => { + const options: Options = {} + options.columns_duplicates_to_array = true + options.columnsDuplicatesToArray = true + }) + it('comment', () => { const options: Options = {} options.comment = '\\' @@ -136,6 +166,7 @@ describe('API Types', () => { it('delimiter', () => { const options: Options = {} options.delimiter = ':' + options.delimiter = [':', ')'] options.delimiter = Buffer.from(':') }) @@ -153,6 +184,7 @@ describe('API Types', () => { it('from_line', () => { const options: Options = {} options.from_line = 10 + options.fromLine = 10 }) it('info', () => { @@ -168,6 +200,7 @@ describe('API Types', () => { it('max_record_size', () => { const options: Options = {} options.max_record_size = 100 + options.maxRecordSize = 100 }) it('objname', () => { @@ -175,11 +208,20 @@ describe('API Types', () => { options.objname = 'name' }) + it('on_record', () => { + const options: Options = {} + options.on_record = (record, {lines}) => + [lines, record[0]] + options.onRecord = (record, {lines}) => + [lines, record[0]] + }) + it('quote', () => { const options: Options = {} options.quote = '"' options.quote = true options.quote = Buffer.from('"') + options.quote = null }) it('raw', () => { @@ -195,6 +237,11 @@ describe('API Types', () => { it('relax_column_count', () => { const options: Options = {} options.relax_column_count = true + options.relaxColumnCount = true + options.relax_column_count_less = true + options.relaxColumnCountLess = true + options.relax_column_count_more = true + options.relaxColumnCountMore = true }) it('record_delimiter', () => { @@ -203,6 +250,7 @@ describe('API Types', () => { options.record_delimiter = ['\n'] options.record_delimiter = Buffer.from('\n') options.record_delimiter = [Buffer.from('\n')] + options.recordDelimiter = '\n' }) it('rtrim', () => { @@ -213,21 +261,25 @@ describe('API Types', () => { it('skip_empty_lines', () => { const options: Options = {} options.skip_empty_lines = true + options.skipEmptyLines = true }) it('skip_empty_lines', () => { const options: Options = {} options.skip_empty_lines = true + options.skipEmptyLines = true }) it('skip_lines_with_empty_values', () => { const options: Options = {} options.skip_lines_with_empty_values = true + options.skipLinesWithEmptyValues = true }) it('skip_lines_with_error', () => { const options: Options = {} options.skip_empty_lines = true + options.skipEmptyLines = true }) it('to', () => { @@ -238,6 +290,7 @@ describe('API Types', () => { it('to_line', () => { const options: Options = {} options.to_line = 10 + options.toLine = 10 }) it('trim', () => { @@ -267,4 +320,37 @@ describe('API Types', () => { }) }) + describe('CsvError', () => { + describe('Typescript definition is accurate', () => { + it('Minimum', () => { + const error = new CsvError("CSV_INCONSISTENT_RECORD_LENGTH", "MESSAGE"); + + error.code.should.eql("CSV_INCONSISTENT_RECORD_LENGTH") + error.message.should.eql("MESSAGE") + }) + + it('Multiple messages', () => { + const error = new CsvError("CSV_INCONSISTENT_RECORD_LENGTH", ["MESSAGE1", "MESSAGE2"]) + + error.code.should.eql("CSV_INCONSISTENT_RECORD_LENGTH") + error.message.should.eql("MESSAGE1 MESSAGE2") + }) + + it('Supports contexts', () => { + const error = new CsvError("CSV_INCONSISTENT_RECORD_LENGTH", "MESSAGE", { testContext: { testProp: "testValue" } }) + + error.code.should.eql("CSV_INCONSISTENT_RECORD_LENGTH") + error.message.should.eql("MESSAGE") + error.should.have.key("testContext").and.eql({ testProp: "testValue" }) + }) + }) + + it('Proper type is thrown when an error is encountered', () => { + parse(`a,b\nc`, function (e: Error) { + const isCsvError = e instanceof CsvError; + isCsvError.should.be.true(); + (e as CsvError).code.should.eql('CSV_INCONSISTENT_RECORD_LENGTH'); + }) + }) + }) }) diff --git a/test/info.lines.coffee b/test/info.lines.coffee index e127aa5..3f5e6fc 100644 --- a/test/info.lines.coffee +++ b/test/info.lines.coffee @@ -1,5 +1,6 @@ parse = require '../lib' +assert_error = require './api.assert_error' describe 'properties lines', -> @@ -41,7 +42,10 @@ describe 'properties lines', -> this,"line",is,"invalid",h"ere" "and",valid,line,follows... """, (err, data) -> - err.message.should.match /Invalid opening quote at line 3/ + assert_error err, + message: 'Invalid Opening Quote: a quote is found inside a field at line 3' + code: 'INVALID_OPENING_QUOTE' + field: 'h' (data == undefined).should.be.true next() @@ -53,7 +57,10 @@ describe 'properties lines', -> this,"line",is,invalid h"ere" "and",valid,line,follows... """, skip_empty_lines: true, (err, data) -> - err.message.should.match /Invalid opening quote at line 4/ + assert_error err, + message: 'Invalid Opening Quote: a quote is found inside a field at line 4' + code: 'INVALID_OPENING_QUOTE' + field: 'invalid h' (data == undefined).should.be.true next() @@ -65,7 +72,9 @@ describe 'properties lines', -> "",1974,8.8392926E7,""," "",1974,8.8392926E7,"","" """, (err, data) -> - err.message.should.eql "Invalid Closing Quote: quote is not closed at line 5" + assert_error err, + message: 'Quote Not Closed: the parsing is finished with an opening quote at line 5' + code: 'CSV_QUOTE_NOT_CLOSED' (data == undefined).should.be.true next() @@ -77,7 +86,9 @@ describe 'properties lines', -> " 1974 8.8392926E7 ""t " " 1974 8.8392926E7 "t "" """, quote: '"', escape: '"', delimiter: "\t", (err, data) -> - err.message.should.eql 'Invalid Closing Quote: got " " at line 3 instead of delimiter, row delimiter, trimable character (if activated) or comment' + assert_error err, + message: 'Invalid Closing Quote: got " " at line 3 instead of delimiter, row delimiter, trimable character (if activated) or comment' + code: 'CSV_INVALID_CLOSING_QUOTE' (data == undefined).should.be.true next() @@ -90,6 +101,8 @@ describe 'properties lines', -> "",1974,8.8392926E7,"","" "",1974,8.8392926E7,""t,"" """, quote: '"', escape: '"', (err, data) -> - err.message.should.eql 'Invalid Closing Quote: got "t" at line 2 instead of delimiter, row delimiter, trimable character (if activated) or comment' + assert_error err, + message: 'Invalid Closing Quote: got "t" at line 2 instead of delimiter, row delimiter, trimable character (if activated) or comment' + code: 'CSV_INVALID_CLOSING_QUOTE' (data == undefined).should.be.true next() diff --git a/test/option.bom.coffee b/test/option.bom.coffee index e4c19c4..a7ef9ef 100644 --- a/test/option.bom.coffee +++ b/test/option.bom.coffee @@ -1,7 +1,15 @@ parse = require '../lib' +assert_error = require './api.assert_error' describe 'Option `bom`', -> + + it 'validate', -> + (-> + parse bom: 'ohno', ( -> ) + ).should.throw + message: 'Invalid option bom: bom must be true, got "ohno"' + code: 'CSV_INVALID_OPTION_BOM' it 'preserve bom if not defined', (next) -> parser = parse (err, data) -> @@ -25,9 +33,32 @@ describe 'Option `bom`', -> parser.write Buffer.from 'd,e,f' parser.end() + it 'with column option with bom `true`', (next) -> + parser = parse + columns: true + bom: true + , (err, records) -> + records[0]['key'].should.eql 'value' + next() + parser.write Buffer.from "\ufeffkey\nvalue" + parser.end() + + it 'with column option with bom `false`', (next) -> + parser = parse + columns: true + bom: false + , (err, records) -> + records[0]['\ufeffkey'].should.eql 'value' + next() + parser.write Buffer.from "\ufeffkey\nvalue" + parser.end() + it 'throw parsing error if quote follow bom', (next) -> parser = parse (err, data) -> - err.message.should.eql 'Invalid opening quote at line 1' + assert_error err, + message: 'Invalid Opening Quote: a quote is found inside a field at line 1' + code: 'INVALID_OPENING_QUOTE' + field: '\ufeff' next() parser.write Buffer.from "\ufeff\"a\",b,c\n" parser.write Buffer.from 'd,e,f' diff --git a/test/option.cast.coffee b/test/option.cast.coffee index 92fd564..1cc63df 100644 --- a/test/option.cast.coffee +++ b/test/option.cast.coffee @@ -1,8 +1,16 @@ parse = require '../lib' +assert_error = require './api.assert_error' describe 'Option `cast`', -> + it 'validate', -> + (-> + parse cast: 'ohno', ( -> ) + ).should.throw + message: 'Invalid option cast: cast must be true or a function, got "ohno"' + code: 'CSV_INVALID_OPTION_CAST' + describe 'boolean true', -> it 'all columns', (next) -> @@ -31,8 +39,14 @@ describe 'Option `cast`', -> parser.end() it 'ints', (next) -> - parse '123a,123,0123,', cast: true, (err, data) -> - data.should.eql [ ['123a', 123, 123, ''] ] + parse '123a,123,+123,-123,0123,+0123,-0123,', cast: true, (err, data) -> + data.should.eql [ ['123a', 123, 123, -123, 123, 123, -123, ''] ] + next() + + it 'ints isnt exposed to DOS vulnerabilities, npm security issue 69742', (next) -> + data = Array.from( length: 3000000 ).map( (x) -> '1' ).join('') + '!' + parse data, cast: true, (err, data) -> + data[0][0].length.should.eql 3000001 next() it 'float', (next) -> @@ -54,11 +68,11 @@ describe 'Option `cast`', -> , (err, records) -> records.should.eql [ [ '2000-01-01T05:00:00.000Z', { - column: 1, empty_lines: 0, header: false, index: 1, + column: 1, empty_lines: 0, header: false, index: 1, invalid_field_length: 0, lines: 1, quoting: false, records: 0 } ] [ '2050-11-27T05:00:00.000Z', { - column: 1, empty_lines: 0, header: false, index: 1, + column: 1, empty_lines: 0, header: false, index: 1, invalid_field_length: 0, lines: 2, quoting: false, records: 1 } ] ] unless err @@ -83,6 +97,8 @@ describe 'Option `cast`', -> next err it 'dont call cast on unreferenced columns', (next) -> + # It doesn't make sense to cast value which cannot later be assigned + # to a column name in the returned object parse """ 1,2 3,4,5,6 @@ -90,7 +106,7 @@ describe 'Option `cast`', -> """, columns: ['a', 'b'] relax_column_count: true - cast: (value, {header, column}) -> + cast: (value, {column}) -> throw Error 'Oh no' if value > 4 and value < 7 , (err, records) -> next err @@ -213,7 +229,9 @@ describe 'Option `cast`', -> when 2 then if context.header then null else value when 3 then if context.header then 1234 else value , (err, data) -> - err.message.should.eql('Invalid Option columns: expect a string or an object, got 1234 at position 3') + assert_error err, + message: 'Invalid column definition: expect a string or a literal object, got 1234 at position 3' + code: 'CSV_INVALID_COLUMN_DEFINITION' next() describe 'error', -> diff --git a/test/option.cast_date.coffee b/test/option.cast_date.coffee index 01423f2..1e4ddf0 100644 --- a/test/option.cast_date.coffee +++ b/test/option.cast_date.coffee @@ -1,8 +1,16 @@ parse = require '../lib' +assert_error = require './api.assert_error' describe 'Option `cast_date`', -> + it 'validate', -> + (-> + parse cast: true, cast_date: 'ohno', ( -> ) + ).should.throw + message: 'Invalid option cast_date: cast_date must be true or a function, got "ohno"' + code: 'CSV_INVALID_OPTION_CAST_DATE' + it 'true', (next) -> data = [] parser = parse """ diff --git a/test/option.columns.coffee b/test/option.columns.coffee index b88157d..497e30e 100644 --- a/test/option.columns.coffee +++ b/test/option.columns.coffee @@ -1,5 +1,6 @@ parse = require '../lib' +assert_error = require './api.assert_error' describe 'Option `columns`', -> @@ -12,12 +13,23 @@ describe 'Option `columns`', -> },{ invalid: 'oh no' }], (->) - ).should.throw 'Invalid Option columns: property "name" is required at position 1 when column is an object literal' + ).should.throw + message: 'Option columns missing name: property "name" is required at position 1 when column is an object literal' + code: 'CSV_OPTION_COLUMNS_MISSING_NAME' it 'check the columns value', -> (-> parse "", columns: [{name: 'valid'}, true], (->) - ).should.throw 'Invalid Option columns: expect a string or an object, got true at position 1' + ).should.throw + message: 'Invalid column definition: expect a string or a literal object, got true at position 1' + code: 'CSV_INVALID_COLUMN_DEFINITION' + + it 'check the columns value', -> + (-> + parse "", columns: {}, (->) + ).should.throw + message: 'Invalid option columns: expect an object, a function or true, got {}' + code: 'CSV_INVALID_OPTION_COLUMNS' it 'skip columns with false value', (next) -> parse """ @@ -74,6 +86,28 @@ describe 'Option `columns`', -> ['d', 'e', 'f'] ] unless err next err + + it 'header detection honors skip_empty_lines', (next) -> + parse """ + + a,b,c + 1,2,3 + """, columns: true, skip_empty_lines: true, (err, data) -> + data.should.eql [ + {a: "1", b: "2", c: "3"} + ] unless err + next err + + it 'header detection honors skip_lines_with_empty_values', (next) -> + parse """ + ,, + a,b,c + 1,2,3 + """, columns: true, skip_lines_with_empty_values: true, (err, data) -> + data.should.eql [ + {a: "1", b: "2", c: "3"} + ] unless err + next err describe 'boolean array', -> @@ -105,7 +139,9 @@ describe 'Option `columns`', -> 4,5,6,x 7,8,9,x """, columns: ["a", "b", "c", "d"], (err, data) -> - err.message.should.eql 'Invalid Record Length: header length is 4, got 3 on line 1' + assert_error err, + message: 'Invalid Record Length: columns length is 4, got 3 on line 1' + code: 'CSV_RECORD_DONT_MATCH_COLUMNS_LENGTH' next() it 'validate options column length on last line', (next) -> @@ -114,7 +150,29 @@ describe 'Option `columns`', -> 4,5,6,x 7,8,9 """, columns: ["a", "b", "c", "d"], (err, data) -> - err.message.should.eql 'Invalid Record Length: header length is 4, got 3 on line 3' + assert_error err, + message: 'Invalid Record Length: columns length is 4, got 3 on line 3' + code: 'CSV_RECORD_DONT_MATCH_COLUMNS_LENGTH' + next() + + it 'context column is null when cast force the context creation', (next) -> + # Trigger cast to force the creation of a context + parse "a\nb,\n", + columns: true + cast: (value) -> value + , (err, data) -> + assert_error err, + message: 'Invalid Record Length: columns length is 1, got 2 on line 2' + code: 'CSV_RECORD_DONT_MATCH_COLUMNS_LENGTH' + column: null + next() + + it 'context column is null when columns number inferieur to record length, fix regression #259', (next) -> + parse "a\nb,\n", columns: true, (err, data) -> + assert_error err, + message: 'Invalid Record Length: columns length is 1, got 2 on line 2' + code: 'CSV_RECORD_DONT_MATCH_COLUMNS_LENGTH' + column: null next() it 'skips column names defined as undefined', (next) -> @@ -168,6 +226,19 @@ describe 'Option `columns`', -> { a: '3' } ] unless err next err + + it '', (next) -> + # Trigger a bug where error is try to stringify and parse an undefined + # value, conjointly triggered by a null column and a + # CSV_RECORD_DONT_MATCH_COLUMNS_LENGTH error + parse """ + col_a,col_b,col_c + foo,bar + foo,bar,baz + """ + , columns: ['a', 'b', null], (err, data) -> + err.code.should.eql 'CSV_RECORD_DONT_MATCH_COLUMNS_LENGTH' + next() describe 'function', -> @@ -204,6 +275,19 @@ describe 'Option `columns`', -> hij,789,klm,0 """, columns: (columns) -> throw Error 'Catchme' - , (err, data) -> + , (err) -> err.message.should.eql 'Catchme' next() + + it 'must return an array of headers', (next) -> + parse """ + FIELD_1 + abc + """, columns: (columns) -> + return {FIELD: true} + , (err) -> + assert_error err, + message: 'Invalid Column Mapping: expect an array from column function, got {"FIELD":true}' + code: 'CSV_INVALID_COLUMN_MAPPING' + headers: FIELD: true + next() diff --git a/test/option.columns_duplicates_to_array.coffee b/test/option.columns_duplicates_to_array.coffee new file mode 100644 index 0000000..00d6428 --- /dev/null +++ b/test/option.columns_duplicates_to_array.coffee @@ -0,0 +1,42 @@ + +parse = require '../lib' +assert_error = require './api.assert_error' + +describe 'Option `columns_duplicates_to_array`', -> + + it 'validate', -> + (-> + parse "", columns_duplicates_to_array: 'invalid' + ).should.throw + code: 'CSV_INVALID_OPTION_COLUMNS_DUPLICATES_TO_ARRAY' + message: [ + 'Invalid option columns_duplicates_to_array:', + 'expect an boolean, got "invalid"' + ].join ' ' + + + it 'when false', (next) -> + parse """ + FIELD_1,FIELD_1 + ABC,DEF + GHI,JKL + """, columns: true, columns_duplicates_to_array: false, (err, data) -> + data.should.eql [ + 'FIELD_1': 'DEF' + , + 'FIELD_1': 'JKL' + ] unless err + next err + + it 'when true', (next) -> + parse """ + FIELD_1,FIELD_1 + ABC,DEF + GHI,JKL + """, columns: true, columns_duplicates_to_array: true, (err, data) -> + data.should.eql [ + 'FIELD_1': ['ABC', 'DEF'] + , + 'FIELD_1': ['GHI', 'JKL'] + ] unless err + next err diff --git a/test/option.comment.coffee b/test/option.comment.coffee index d0f1b6c..b56c803 100644 --- a/test/option.comment.coffee +++ b/test/option.comment.coffee @@ -10,10 +10,14 @@ describe 'Option `comment`', -> parse '', comment: '', (->) (-> parse '', comment: true, (->) - ).should.throw 'Invalid Option: comment must be a buffer or a string, got true' + ).should.throw + message: 'Invalid option comment: comment must be a buffer or a string, got true' + code: 'CSV_INVALID_OPTION_COMMENT' (-> parse '', comment: 2, (->) - ).should.throw 'Invalid Option: comment must be a buffer or a string, got 2' + ).should.throw + message: 'Invalid option comment: comment must be a buffer or a string, got 2' + code: 'CSV_INVALID_OPTION_COMMENT' it 'single comment line', (next) -> parse '# comment', comment: '#', (err, data) -> @@ -22,7 +26,7 @@ describe 'Option `comment`', -> it 'single comment line with empty field', (next) -> parse '""# comment', comment: '#', (err, data) -> - data.length.should.eql 1 + data.should.eql [['']] next err it 'skip line starting by single comment char', (next) -> diff --git a/test/option.delimiter.coffee b/test/option.delimiter.coffee index 98f1afd..fe16cc8 100644 --- a/test/option.delimiter.coffee +++ b/test/option.delimiter.coffee @@ -8,13 +8,34 @@ describe 'Option `delimiter`', -> parse '', delimiter: Buffer.from(','), (->) (-> parse '', delimiter: '', (->) - ).should.throw 'Invalid Option: delimiter must be a non empty string' + ).should.throw + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got ""' + code: 'CSV_INVALID_OPTION_DELIMITER' (-> parse '', delimiter: Buffer.from(''), (->) - ).should.throw 'Invalid Option: delimiter must be a non empty buffer' + ).should.throw + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got {"type":"Buffer","data":[]}' + code: 'CSV_INVALID_OPTION_DELIMITER' (-> parse '', delimiter: true, (->) - ).should.throw 'Invalid Option: delimiter must be a string or a buffer, got true' + ).should.throw + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got true' + code: 'CSV_INVALID_OPTION_DELIMITER' + (-> + parse '', delimiter: [], (->) + ).should.throw + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got []' + code: 'CSV_INVALID_OPTION_DELIMITER' + (-> + parse '', delimiter: [''], (->) + ).should.throw + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got [""]' + code: 'CSV_INVALID_OPTION_DELIMITER' + (-> + parse '', delimiter: [',',''], (->) + ).should.throw + message: 'Invalid option delimiter: delimiter must be a non empty string or buffer or array of string|buffer, got [",",""]' + code: 'CSV_INVALID_OPTION_DELIMITER' it 'using default comma', (next) -> parse """ @@ -63,3 +84,39 @@ describe 'Option `delimiter`', -> [ '','1974','8.8392926E7','',''] ] next() + + it 'using array of a single delimiter', (next) -> + parse """ + abc,,123, + ,def,, + """, delimiter: [','], (err, data) -> + return next err if err + data.should.eql [ + [ 'abc','','123',''] + [ '','def','',''] + ] + next() + + it 'using array of a single delimiter of multiple characters', (next) -> + parse """ + !# + !# + """, delimiter: ['!#'], (err, data) -> + return next err if err + data.should.eql [ + [ '', ''] + [ '', ''] + ] + next() + + it 'using array of a multiple delimiters of variable length', (next) -> + parse """ + abc,;;123;; + ;;def;;, + """, delimiter: [',', ';;'], (err, data) -> + return next err if err + data.should.eql [ + [ 'abc','','123',''] + [ '','def','',''] + ] + next() diff --git a/test/option.escape.coffee b/test/option.escape.coffee index f8ea447..3705140 100644 --- a/test/option.escape.coffee +++ b/test/option.escape.coffee @@ -2,19 +2,43 @@ parse = require '../lib' describe 'Option `escape`', -> + + describe 'normalisation, coercion & validation', -> + + it 'default', -> + parse().options.escape.should.eql Buffer.from('"')[0] + parse(escape: undefined).options.escape.should.eql Buffer.from('"')[0] + parse(escape: true).options.escape.should.eql Buffer.from('"')[0] + + it 'custom', -> + parse(escape: '\\').options.escape.should.eql Buffer.from('\\')[0] + parse(escape: Buffer.from('\\')).options.escape.should.eql Buffer.from('\\')[0] - it 'validation', -> - parse '', escape: '\\', (->) - parse '', escape: Buffer.from('\\'), (->) - parse '', escape: null, (->) - parse '', escape: undefined, (->) - (-> - parse '', escape: false, (->) - ).should.throw 'Invalid Option: escape must be a buffer or a string, got false' - (-> - parse '', escape: true, (->) - ).should.throw 'Invalid Option: escape must be a buffer or a string, got true' + it 'disabled', -> + (parse(escape: null).options.escape is null).should.be.true() + (parse(escape: false).options.escape is null).should.be.true() + it 'invalid', -> + (-> + parse escape: 1 + ).should.throw 'Invalid Option: escape must be a buffer, a string or a boolean, got 1' + (-> + parse escape: 'abc' + ).should.throw 'Invalid Option Length: escape must be one character, got 3' + + describe 'disabled', -> + + it 'when null', (next) -> + parse ''' + a"b + '1"2' + ''', escape: null, quote: '\'', (err, data) -> + return next err if err + data.should.eql [ + [ 'a"b' ],[ '1"2' ] + ] + next() + describe 'same as quote', -> it 'is same as quote', (next) -> @@ -28,7 +52,7 @@ describe 'Option `escape`', -> [ 'f"g','h','i1"i2' ] ] next() - + describe 'different than quote', -> it 'apply to quote char', (next) -> @@ -67,6 +91,16 @@ describe 'Option `escape`', -> ] next() + it 'does not apply to delimiter', (next) -> + parse ''' + aa\\,bb + ''', escape: '\\', (err, data) -> + return next err if err + data.should.eql [ + [ 'aa\\','bb' ] + ] + next() + it 'handle non continuous chunks', (next) -> data = [] parser = parse escape: '\\' diff --git a/test/option.from_line.coffee b/test/option.from_line.coffee index 88461fe..c3cf883 100644 --- a/test/option.from_line.coffee +++ b/test/option.from_line.coffee @@ -37,19 +37,16 @@ describe 'Option `from_line`', -> [ '7','8','9' ] ] unless err next err - - it 'count headers', (next) -> - parse """ - a,b,c - 1,2,3 - 4,5,6 - 7,8,9 - """, columns: true, from_line: 3, (err, data) -> - data.should.eql [ - {a: '4',b: '5',c: '6'} - {a: '7',b: '8',c: '9'} - ] unless err - next err + + it 'handle lines with inconsistent number of fields', (next) -> + parse """ + a + 1,2,3 + """, from_line: 2, (err, data) -> + data.should.eql [ + ['1', '2', '3'] + ] unless err + next err it 'records with quoted line at the begining of line', (next) -> parse """ @@ -105,3 +102,13 @@ describe 'Option `from_line`', -> [ 'e','f' ] ] unless err next err + + it 'honors header', (next) -> + parse """ + x,y,z + x,y,z + a,b,c + 4,5,6 + """, from_line: 3, columns: true, (err, data) -> + data.should.eql [{ a: '4', b: '5', c: '6' }] unless err + next err diff --git a/test/option.ltrim.coffee b/test/option.ltrim.coffee index 016bdc7..6139ee8 100644 --- a/test/option.ltrim.coffee +++ b/test/option.ltrim.coffee @@ -1,5 +1,6 @@ parse = require '../lib' +assert_error = require './api.assert_error' describe 'Option `ltrim`', -> @@ -51,7 +52,10 @@ describe 'Option `ltrim`', -> x " e f", x " g h" ''' parser = parse ltrim: true, (err, data) -> - err.message.should.eql 'Invalid opening quote at line 1' + assert_error err, + message: 'Invalid Opening Quote: a quote is found inside a field at line 1' + code: 'INVALID_OPENING_QUOTE' + field: 'x ' next() parser.write chr for chr in data parser.end() diff --git a/test/option.max_record_size.coffee b/test/option.max_record_size.coffee index 1c28ac8..9663f72 100644 --- a/test/option.max_record_size.coffee +++ b/test/option.max_record_size.coffee @@ -1,5 +1,6 @@ parse = require '../lib' +assert_error = require './api.assert_error' describe 'Option `max_record_size`', -> @@ -23,5 +24,9 @@ describe 'Option `max_record_size`', -> hi,xxxxxxxxxxxxxxx,jk lm,no,pq ''', max_record_size: 10, (err) -> - err.message.should.eql 'Max Record Size: record exceed the maximum number of tolerated bytes of 10 on line 3' + assert_error err, + message: 'Max Record Size: record exceed the maximum number of tolerated bytes of 10 at line 3' + code: 'CSV_MAX_RECORD_SIZE' + column: 1, empty_lines: 0, header: false, index: 1, invalid_field_length: 0, + quoting: false, lines: 3, records: 2 next() diff --git a/test/option.on_record.coffee b/test/option.on_record.coffee new file mode 100644 index 0000000..00543fd --- /dev/null +++ b/test/option.on_record.coffee @@ -0,0 +1,51 @@ + +parse = require '../lib' +assert_error = require './api.assert_error' + +describe 'Option `on_record`', -> + + it 'validate', -> + (-> + parse on_record: true + ).should.throw + message: 'Invalid option `on_record`: expect a function, got true' + code: 'CSV_INVALID_OPTION_ON_RECORD' + + it 'alter records', (next) -> + parse "a,b", on_record: (record) -> + [record[1], record[0]] + , (err, records) -> + records.should.eql [ ['b', 'a'] ] unless err + next err + + it 'filter records', (next) -> + parse "a,b\nc,d\ne,f", on_record: (record, {lines}) -> + if lines is 2 then null else record + , (err, records) -> + records.should.eql [ ['a', 'b'], ['e', 'f'] ] unless err + next err + + it 'errors with callback', (next) -> + parse "a,b\nc,d\ne,f", + on_record: (record, {lines}) -> + if lines is 2 then throw Error 'Error thrown on line 2' else record + , (err, records) -> + err.message.should.eql 'Error thrown on line 2' + next() + + it 'errors with events', (next) -> + parser = parse "a,a,a\nc,d\ne,f" + parser.on 'error', (err) -> + err.message.should.eql 'Invalid Record Length: expect 3, got 2 on line 2' + next() + parser.on 'end', () -> + next Error 'Should not be called' + + it 'errors not handled by skip_lines_with_error', (next) -> + parse "a,b\nc,d\ne,f", + on_record: (record, {lines}) -> + if lines is 2 then throw Error 'Error thrown on line 2' else record + skip_lines_with_error: true + , (err, records) -> + err.message.should.eql 'Error thrown on line 2' + next() diff --git a/test/option.quote.coffee b/test/option.quote.coffee index 622f15f..5090f46 100644 --- a/test/option.quote.coffee +++ b/test/option.quote.coffee @@ -1,5 +1,6 @@ parse = require '../lib' +assert_error = require './api.assert_error' describe 'Option `quote`', -> @@ -125,20 +126,34 @@ describe 'Option `quote`', -> data.should.eql [['a','b','c'],['1','r"2"d"2"','3']] unless err next err + describe 'with options', -> + + it 'columns', (next) -> + parse """ + a,"b",c + 1,"2",3 + """, quote: true, columns: true, (err, data) -> + data.should.eql [ { a: '1', b: '2', c: '3' } ] unless err + next err + describe 'error "Quoted field not terminated"', -> it 'when unclosed', (next) -> parse """ "",1974,8.8392926E7,""," """, (err, data) -> - err.message.should.eql 'Invalid Closing Quote: quote is not closed at line 1' + assert_error err, + message: 'Quote Not Closed: the parsing is finished with an opening quote at line 1' + code: 'CSV_QUOTE_NOT_CLOSED' next() describe 'error "Invalid Closing Quote"', -> it 'when followed by a character', (next) -> parse '""!', quote: '"', escape: '"', (err) -> - err.message.should.eql 'Invalid Closing Quote: got "!" at line 1 instead of delimiter, row delimiter, trimable character (if activated) or comment' + assert_error err, + message: 'Invalid Closing Quote: got "!" at line 1 instead of delimiter, row delimiter, trimable character (if activated) or comment' + code: 'CSV_INVALID_CLOSING_QUOTE' next() it 'no throw followed by a comment', (next) -> @@ -166,7 +181,10 @@ describe 'Option `quote`', -> this,"line",is,invalid h"ere" "and",valid,line,follows... """, (err, data) -> - err.message.should.eql 'Invalid opening quote at line 3' + assert_error err, + message: 'Invalid Opening Quote: a quote is found inside a field at line 3' + code: 'INVALID_OPENING_QUOTE' + field: 'invalid h' (data == undefined).should.be.true next() diff --git a/test/option.relax.coffee b/test/option.relax.coffee index a010eb5..955957a 100644 --- a/test/option.relax.coffee +++ b/test/option.relax.coffee @@ -1,5 +1,6 @@ parse = require '../lib' +assert_error = require './api.assert_error' describe 'Option `relax`', -> @@ -31,25 +32,21 @@ describe 'Option `relax`', -> parse """ 384682,the "SAMAY" Hostel,Jiron Florida 285 """, relax: false, (err, data) -> - err.message.should.eql 'Invalid opening quote at line 1' + assert_error err, + message: 'Invalid Opening Quote: a quote is found inside a field at line 1' + code: 'INVALID_OPENING_QUOTE' + field: 'the ' next() it 'true with invalid quotes on the left', (next) -> - # try with relax true - # parse """ - # 384682,"SAMAY" Hostel,Jiron Florida 285 - # """, relax: true, (err, data) -> - # return next err if err - # data.should.eql [ - # [ '384682', '"SAMAY" Hostel', 'Jiron Florida 285' ] - # ] - # next() parse """ a,"b" c,d + a,""b" c,d """, relax: true, (err, data) -> return next err if err data.should.eql [ [ 'a', '"b" c', 'd' ] + [ 'a', '""b" c', 'd' ] ] next() @@ -57,49 +54,58 @@ describe 'Option `relax`', -> # transform is throwing instead of emiting error, skipping for now i = 0 parse """ - 384682,"SAMAY" Hostel,Jiron Florida 285 + a,"b" c,d """, relax: false, (err, data) -> - err.message.should.eql 'Invalid Closing Quote: got " " at line 1 instead of delimiter, row delimiter, trimable character (if activated) or comment' - next() + assert_error err, + message: 'Invalid Closing Quote: got " " at line 1 instead of delimiter, row delimiter, trimable character (if activated) or comment' + code: 'CSV_INVALID_CLOSING_QUOTE' + next() it 'true with two invalid quotes on the left', (next) -> # try with relax true parse """ - 384682,""SAMAY"" Hostel,Jiron Florida 285 + a,""b"" c,d """, relax: true, (err, data) -> return next err if err data.should.eql [ - [ '384682', '""SAMAY"" Hostel', 'Jiron Florida 285' ] + [ 'a', '""b"" c', 'd' ] ] unless err next err it 'false with two invalid quotes on the left', (next) -> # try with relax false parse """ - 384682,""SAMAY"" Hostel,Jiron Florida 285 + a,""b"" c,d """, relax: false, (err, data) -> # Change of implementation in version 4, was - err.message.should.eql 'Invalid Closing Quote: got "S" at line 1 instead of delimiter, row delimiter, trimable character (if activated) or comment' # data.should.eql [ - # [ '384682', '"SAMAY" Hostel', 'Jiron Florida 285' ] + # [ 'a', '"b" c', 'd' ] # ] unless err + assert_error err, + message: 'Invalid Closing Quote: got "b" at line 1 instead of delimiter, row delimiter, trimable character (if activated) or comment' + code: 'CSV_INVALID_CLOSING_QUOTE' next() it 'true with invalid quotes on the right', (next) -> # TODO: we need to decide the strategy we want here parse """ - 384682,SAMAY Hostel,Jiron "Florida 285" + a,b "c",d + Bob"","23",e """, relax: true, (err, data) -> return next err if err data.should.eql [ - [ '384682', 'SAMAY Hostel', 'Jiron "Florida 285"' ] + [ 'a', 'b "c"', 'd' ] + [ 'Bob""', '23','e' ] ] next() it 'false with invalid quotes on the right', (next) -> # transform is throwing instead of emiting error, skipping for now parse """ - 384682,SAMAY Hostel,Jiron "Florida 285" + a,b "c" """, relax: false, (err, data) -> - err.message.should.eql 'Invalid opening quote at line 1' + assert_error err, + message: 'Invalid Opening Quote: a quote is found inside a field at line 1' + code: 'INVALID_OPENING_QUOTE' + field: 'b ' next() diff --git a/test/option.relax_column_count.coffee b/test/option.relax_column_count.coffee index 6942287..9dc662a 100644 --- a/test/option.relax_column_count.coffee +++ b/test/option.relax_column_count.coffee @@ -1,5 +1,6 @@ parse = require '../lib' +assert_error = require './api.assert_error' describe 'Option `relax_column_count`', -> @@ -20,7 +21,10 @@ describe 'Option `relax_column_count`', -> 1,2,3 4,5 """, (err, data) -> - err.message.should.eql 'Invalid Record Length: expect 3, got 2 on line 2' + assert_error err, + code: 'CSV_INCONSISTENT_RECORD_LENGTH' + message: 'Invalid Record Length: expect 3, got 2 on line 2' + record: ['4', '5'] next() it 'emit single error when column count is invalid on multiple lines', (next) -> @@ -31,8 +35,11 @@ describe 'Option `relax_column_count`', -> 5,6,7 """ , (err, data) -> - err.message.should.eql 'Invalid Record Length: expect 2, got 1 on line 2' - process.nextTick next + assert_error err, + code: 'CSV_INCONSISTENT_RECORD_LENGTH' + message: 'Invalid Record Length: expect 2, got 1 on line 2' + record: ['1'] + next() it 'dont throw error if true', (next) -> parse """ @@ -79,3 +86,52 @@ describe 'Option `relax_column_count`', -> { "a":"9", "b":"10" } ] unless err next err + + describe 'relax_column_count_more', -> + + it 'when more', (next) -> + parse """ + 1,2,3 + a,b,c,d + """, relax_column_count_more: true, (err, data) -> + data.should.eql [ + ['1', '2', '3'] + ['a', 'b', 'c', 'd'] + ] unless err + next err + + it 'when less', (next) -> + parse """ + 1,2,3 + a,b + """, relax_column_count_more: true, (err, data) -> + assert_error err, + code: 'CSV_INCONSISTENT_RECORD_LENGTH' + message: 'Invalid Record Length: expect 3, got 2 on line 2' + record: ['a', 'b'] + next() + + describe 'relax_column_count_less', -> + + it 'when less', (next) -> + parse """ + 1,2,3 + a,b + """, relax_column_count_less: true, (err, data) -> + data.should.eql [ + ['1', '2', '3'] + ['a', 'b'] + ] unless err + next err + + it 'when more', (next) -> + parse """ + 1,2,3 + a,b,c,d + """, relax_column_count_less: true, (err, data) -> + assert_error err, + code: 'CSV_INCONSISTENT_RECORD_LENGTH' + message: 'Invalid Record Length: expect 3, got 4 on line 2' + record: ['a', 'b', 'c', 'd'] + next() + diff --git a/test/option.rtrim.coffee b/test/option.rtrim.coffee index 25e8216..5040b45 100644 --- a/test/option.rtrim.coffee +++ b/test/option.rtrim.coffee @@ -1,5 +1,6 @@ parse = require '../lib' +assert_error = require './api.assert_error' describe 'Option `rtrim`', -> @@ -57,12 +58,16 @@ describe 'Option `rtrim`', -> parser.end() it 'with char after whitespaces', (next) -> - data = ''' - "a b " x ,"c d " x - "e f " x,"g h " x - ''' + data = [ + '"a b " x ,"c d " x' + '"e f " x,"g h " x ' + ].join '\n' parser = parse rtrim: true, (err, data) -> - err.message.should.eql 'Invalid Closing Quote: found non trimable byte after quote at line 1' + assert_error err, + message: 'Invalid Closing Quote: found non trimable byte after quote at line 1' + code: 'CSV_NON_TRIMABLE_CHAR_AFTER_CLOSING_QUOTE' + column: 0, empty_lines: 0, header: false, index: 0, invalid_field_length: 0, + quoting: true, lines: 1, records: 0 next() parser.write chr for chr in data parser.end() diff --git a/test/option.skip_lines_with_empty_values.coffee b/test/option.skip_lines_with_empty_values.coffee index 8ac728f..f8b203d 100644 --- a/test/option.skip_lines_with_empty_values.coffee +++ b/test/option.skip_lines_with_empty_values.coffee @@ -56,3 +56,28 @@ describe 'Option `skip_lines_with_empty_values`', -> [ 'IJK', 'LMN' ] ] next() + + it 'handle value which are casted to another type than string', (next) -> + parse """ + empty_buffer + boolean + integer + null + undefined + """, + skip_lines_with_empty_values: true + cast: (value) -> + switch value + when 'empty_buffer' then Buffer.from '' + when 'boolean' then true + when 'integer' then 0 + when 'null' then null + when 'undefined' then undefined + else value + , (err, data) -> + return next err if err + data.should.eql [ + [ true ] + [ 0 ] + ] + next() diff --git a/test/option.skip_lines_with_error.coffee b/test/option.skip_lines_with_error.coffee index 9be7636..69e9db5 100644 --- a/test/option.skip_lines_with_error.coffee +++ b/test/option.skip_lines_with_error.coffee @@ -1,5 +1,6 @@ parse = require '../lib' +assert_error = require './api.assert_error' describe 'Option `skip_lines_with_error`', -> @@ -16,17 +17,20 @@ describe 'Option `skip_lines_with_error`', -> ).should.throw 'Invalid Option: skip_lines_with_error must be a boolean, got "oh no"' it 'handle "Invalid closing quote"', (next) -> - skip = null + errors = 0 parser = parse skip_lines_with_error: true, (err, data) -> data.should.eql [ ["a","b","c"] ["one","two","three"] ["seven","eight","nine"] ] unless err - skip.message.should.eql 'Invalid Closing Quote: got " " at line 3 instead of delimiter, row delimiter, trimable character (if activated) or comment' unless err + errors.should.eql 1 next err parser.on 'skip', (err) -> - skip = err + assert_error err, + message: 'Invalid Closing Quote: got " " at line 3 instead of delimiter, row delimiter, trimable character (if activated) or comment' + code: 'CSV_INVALID_CLOSING_QUOTE' + errors++ parser.write ''' "a","b","c" "one","two","three" @@ -36,15 +40,25 @@ describe 'Option `skip_lines_with_error`', -> parser.end() it 'handle "Invalid opening quote"', (next) -> - skip = null + errors = [] parser = parse skip_lines_with_error: true, (err, data) -> data.should.eql [ ["line","1"] ["line", "3"] ] unless err - skip.message.should.match 'Invalid opening quote at line 2' unless err + assert_error errors, [ + message: 'Invalid Opening Quote: a quote is found inside a field at line 2' + code: 'INVALID_OPENING_QUOTE' + field: 'invalid h' + , + message: 'Invalid Opening Quote: a quote is found inside a field at line 2' + code: 'INVALID_OPENING_QUOTE' + field: 'invalid h"ere' + ] + errors.length.should.eql 2 next err - parser.on 'skip', (err) -> skip = err + parser.on 'skip', (err, context) -> + errors.push err parser.write ''' "line",1 "line",invalid h"ere" @@ -53,31 +67,39 @@ describe 'Option `skip_lines_with_error`', -> parser.end() it 'handle "Quoted field not terminated"', (next) -> - skip = null + errors = 0 parser = parse skip_lines_with_error: true, (err, data) -> data.should.eql [ ['a', 'b', 'c', 'd'] ] unless err - skip.message.should.match 'Invalid Closing Quote: quote is not closed at line 2' unless err + errors.should.eql 1 next err parser.on 'skip', (err) -> - skip = err + assert_error err, + message: 'Quote Not Closed: the parsing is finished with an opening quote at line 2' + code: 'CSV_QUOTE_NOT_CLOSED' + errors++ parser.write ''' "a",b,"c",d "",1974,8.8392926E7,""," ''' parser.end() - it 'handle "Number of columns is inconsistent"', (next) -> - skip = null + it 'handle "CSV_RECORD_DONT_MATCH_COLUMNS_LENGTH"', (next) -> + errors = 0 parser = parse skip_lines_with_error: true, columns: ["a", "b", "c", "d"], (err, data) -> data.should.eql [ { a: '4', b: '5', c: '6', d: 'x'} { a: '7', b: '8', c: '9', d: 'y'} ] unless err - skip.message.should.match 'Invalid Record Length: header length is 4, got 3 on line 1' unless err + errors.should.eql 1 next err - parser.on 'skip', (err) -> skip = err + parser.on 'skip', (err) -> + assert_error err, + message: 'Invalid Record Length: columns length is 4, got 3 on line 1' + code: 'CSV_RECORD_DONT_MATCH_COLUMNS_LENGTH' + record: ['1', '2', '3'] + errors++ parser.write ''' 1,2,3 4,5,6,x @@ -86,17 +108,21 @@ describe 'Option `skip_lines_with_error`', -> parser.end() - it 'handle "Invalid Record Length"', (next) -> - skip = null + it 'handle "CSV_INCONSISTENT_RECORD_LENGTH"', (next) -> + errors = 0 parser = parse skip_lines_with_error: true, (err, data) -> data.should.eql [ ['a', 'b', 'c', 'd'] ['e', 'f', 'g', 'h'] ] unless err - skip.message.should.match 'Invalid Record Length: expect 4, got 3 on line 2' unless err + errors.should.eql 1 next err parser.on 'skip', (err) -> - skip = err + assert_error err, + message: 'Invalid Record Length: expect 4, got 3 on line 2' + code: 'CSV_INCONSISTENT_RECORD_LENGTH' + record: ['1', '2', '3'] + errors++ parser.write ''' a,b,c,d 1,2,3 diff --git a/test/option.trim.coffee b/test/option.trim.coffee index 5c08699..8ff4372 100644 --- a/test/option.trim.coffee +++ b/test/option.trim.coffee @@ -58,6 +58,18 @@ describe 'Option `trim`', -> it 'respect rtrim', -> parser = parse trim: true, rtrim: false parser.options.rtrim.should.be.false() + + it 'interpret whitespaces', (next) -> + parse [ + String.fromCharCode 9 # Horizontal tab + String.fromCharCode 10 # NL line feed + String.fromCharCode 12 # NP Form feed + String.fromCharCode 13 # Carriage return + String.fromCharCode 32 # Space + 'sth' + ].join(''), trim: true, record_delimiter: '|', (err, records) -> + records.should.eql [['sth']] + next() it 'should ignore the whitespaces immediately preceding and following the delimiter', (next) -> data = [] diff --git a/test/samples.coffee b/test/samples.coffee new file mode 100644 index 0000000..7d2906b --- /dev/null +++ b/test/samples.coffee @@ -0,0 +1,19 @@ + +fs = require('fs').promises +util = require 'util' +path = require 'path' +{exec} = require 'child_process' +each = require 'each' + +it 'samples', -> + dir = path.resolve __dirname, '../samples' + samples = await fs.readdir dir + each samples.filter( (sample) -> /\.js/.test.sample) + .call (sample, callback) -> + exec "node #{path.resolve dir, sample}", (err) -> + callback err + .promise() + + + +