From 13263514ef6ec02000cf2da39ba6aa2ff92f00ae Mon Sep 17 00:00:00 2001 From: David Worms Date: Tue, 14 May 2024 00:39:30 +0200 Subject: [PATCH] fix(csv-parse): skip event not raised with bom (fix #411) --- demo/issues-esm/lib/411.csv | 2 +- demo/issues-esm/lib/411.js | 59 ++++++++----------- packages/csv-parse/lib/index.js | 5 +- .../option.skip_records_with_error.coffee | 27 ++++++++- 4 files changed, 52 insertions(+), 41 deletions(-) diff --git a/demo/issues-esm/lib/411.csv b/demo/issues-esm/lib/411.csv index cab0a2a35..365872315 100644 --- a/demo/issues-esm/lib/411.csv +++ b/demo/issues-esm/lib/411.csv @@ -1,4 +1,4 @@ -id,first_name,last_name,email,modified_at +id,first_name,last_name,email,modified_at 1,Ring,Grinyov,rgrinyov0@weebly.com,2022-02-14 2,Kylie,Lauderdale,klauderdale1@wsj.com,2022-02-14, 3,Cammi,Bendix,cbendix2@tuttocitta.it,2022-02-14 diff --git a/demo/issues-esm/lib/411.js b/demo/issues-esm/lib/411.js index d57a83a13..957335480 100644 --- a/demo/issues-esm/lib/411.js +++ b/demo/issues-esm/lib/411.js @@ -1,38 +1,27 @@ -import path from 'path'; -import { pipeline } from 'stream/promises'; -import { parse as parseCSV } from 'csv-parse'; -import { Writable } from 'stream'; -import { createReadStream } from 'fs'; +import assert from 'node:assert'; +import { createReadStream } from 'node:fs'; +import { Writable } from 'node:stream' +import { finished } from 'node:stream/promises'; import desm from "desm"; -const __dirname = desm(import.meta.url); - -async function testRecordsSkip() { - const errors = []; - const records = []; - - const sink = new Writable({ - objectMode: true, - write: (_, __, callback) => { - records.push(_); - callback(); - }, - }); +import { parse } from 'csv-parse'; - const csvSource = createReadStream(path.join(__dirname, '411.csv')); - const parser = parseCSV({ - skip_records_with_error: true, - bom: true, - }); - parser.on('skip', function (err) { - errors.push(err); - }); - - await pipeline(csvSource, parser, sink); - - console.log({ - records, - errors, - }); -} +const __dirname = desm(import.meta.url); +const errors = [] -testRecordsSkip().catch(console.error); +const parser = parse({ + bom: true, + skipRecordsWithError: true, +}); +// Create a stream and consume its source +const sink = new Writable ({objectMode: true, write: (_, __, callback) => callback()}) +const outStream = createReadStream(`${__dirname}/411.csv`).pipe(parser).pipe(sink); +// Catch records with errors +parser.on('skip', (e) => { + errors.push(e); +}); +// Wait for stream to be consumed +await finished(outStream); +// Catch error from skip event +assert.deepStrictEqual(errors.map(e => e.message), [ + 'Invalid Record Length: expect 5, got 6 on line 3' +]) diff --git a/packages/csv-parse/lib/index.js b/packages/csv-parse/lib/index.js index 3ad632d97..eb03696ee 100644 --- a/packages/csv-parse/lib/index.js +++ b/packages/csv-parse/lib/index.js @@ -14,10 +14,9 @@ import {CsvError} from './api/CsvError.js'; class Parser extends Transform { constructor(opts = {}){ super({...{readableObjectMode: true}, ...opts, encoding: null}); - this.api = transform(opts); - this.api.options.on_skip = (err, chunk) => { + this.api = transform({on_skip: (err, chunk) => { this.emit('skip', err, chunk); - }; + }, ...opts}); // Backward compatibility this.state = this.api.state; this.options = this.api.options; diff --git a/packages/csv-parse/test/option.skip_records_with_error.coffee b/packages/csv-parse/test/option.skip_records_with_error.coffee index 85ccfd33c..0599f79d2 100644 --- a/packages/csv-parse/test/option.skip_records_with_error.coffee +++ b/packages/csv-parse/test/option.skip_records_with_error.coffee @@ -106,8 +106,7 @@ describe 'Option `skip_records_with_error`', -> 7,8,9,y ''' parser.end() - - + it 'handle "CSV_RECORD_INCONSISTENT_FIELDS_LENGTH"', (next) -> errors = 0 parser = parse skip_records_with_error: true, (err, records) -> @@ -130,6 +129,30 @@ describe 'Option `skip_records_with_error`', -> ''' parser.end() + describe 'with `bom` option', -> + + it 'handle "CSV_RECORD_INCONSISTENT_FIELDS_LENGTH" with bom (fix #411)', (next) -> + errors = 0 + parser = parse bom: true, skip_records_with_error: true, (err, records) -> + records.should.eql [ + ['a', 'b', 'c', 'd'] + ['e', 'f', 'g', 'h'] + ] unless err + errors.should.eql 1 + next err + parser.on 'skip', (err) -> + assert_error err, + message: 'Invalid Record Length: expect 4, got 3 on line 2' + code: 'CSV_RECORD_INCONSISTENT_FIELDS_LENGTH' + record: ['1', '2', '3'] + errors++ + parser.write ''' + \ufeffa,b,c,d + 1,2,3 + e,f,g,h + ''' + parser.end() + describe 'with `raw` option', -> it 'print raw record', (next) ->