Skip to content
This repository was archived by the owner on Jun 28, 2021. It is now read-only.

Commit 620125e

Browse files
jinliming2wdavidw
authored andcommitted
fix: Detecting BOM when data is not enough
/close #246
1 parent c28279e commit 620125e

File tree

4 files changed

+78
-21
lines changed

4 files changed

+78
-21
lines changed

lib/es5/index.js

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ function _iterableToArray(iter) { if (Symbol.iterator in Object(iter) || Object.
1818

1919
function _arrayWithoutHoles(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = new Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } }
2020

21-
function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; var ownKeys = Object.keys(source); if (typeof Object.getOwnPropertySymbols === 'function') { ownKeys = ownKeys.concat(Object.getOwnPropertySymbols(source).filter(function (sym) { return Object.getOwnPropertyDescriptor(source, sym).enumerable; })); } ownKeys.forEach(function (key) { _defineProperty(target, key, source[key]); }); } return target; }
21+
function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; }
22+
23+
function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(source, true).forEach(function (key) { _defineProperty(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(source).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; }
2224

2325
function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; }
2426

@@ -69,7 +71,7 @@ function (_Transform) {
6971

7072
_this = _possibleConstructorReturn(this, _getPrototypeOf(Parser).call(this, _objectSpread({}, {
7173
readableObjectMode: true
72-
}, opts)));
74+
}, {}, opts)));
7375
var options = {}; // Merge with user options
7476

7577
for (var opt in opts) {
@@ -396,6 +398,7 @@ function (_Transform) {
396398
};
397399
_this.options = options;
398400
_this.state = {
401+
bomSkipped: false,
399402
castField: fnCastField,
400403
commenting: false,
401404
enabled: options.from_line === 1,
@@ -472,6 +475,7 @@ function (_Transform) {
472475
to_line = _this$options.to_line;
473476
var record_delimiter = this.options.record_delimiter;
474477
var _this$state = this.state,
478+
bomSkipped = _this$state.bomSkipped,
475479
previousBuf = _this$state.previousBuf,
476480
rawBuffer = _this$state.rawBuffer,
477481
escapeIsQuote = _this$state.escapeIsQuote,
@@ -484,17 +488,34 @@ function (_Transform) {
484488
this.push(null);
485489
return;
486490
} else {
487-
// Handle UTF BOM
488-
if (bom === true && bom_utf8.compare(nextBuf, 0, 3) === 0) {
489-
buf = nextBuf.slice(3);
490-
} else {
491-
buf = nextBuf;
492-
}
491+
buf = nextBuf;
493492
}
494493
} else if (previousBuf !== undefined && nextBuf === undefined) {
495494
buf = previousBuf;
496495
} else {
497496
buf = Buffer.concat([previousBuf, nextBuf]);
497+
} // Handle UTF BOM
498+
499+
500+
if (bomSkipped === false) {
501+
if (bom === false) {
502+
this.state.bomSkipped = true;
503+
} else if (buf.length < 3) {
504+
// No enough data
505+
if (end === false) {
506+
// Wait for more data
507+
this.state.previousBuf = buf;
508+
return;
509+
} // skip BOM detect because data length < 3
510+
511+
} else {
512+
if (bom_utf8.compare(buf, 0, 3) === 0) {
513+
// Skip BOM
514+
buf = buf.slice(3);
515+
}
516+
517+
this.state.bomSkipped = true;
518+
}
498519
}
499520

500521
var bufLen = buf.length;

lib/index.js

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ class Parser extends Transform {
310310
}
311311
this.options = options
312312
this.state = {
313+
bomSkipped: false,
313314
castField: fnCastField,
314315
commenting: false,
315316
enabled: options.from_line === 1,
@@ -355,26 +356,41 @@ class Parser extends Transform {
355356
__parse(nextBuf, end){
356357
const {bom, comment, escape, from, from_line, info, ltrim, max_record_size, quote, raw, relax, rtrim, skip_empty_lines, to, to_line} = this.options
357358
let {record_delimiter} = this.options
358-
const {previousBuf, rawBuffer, escapeIsQuote, trimChars} = this.state
359+
const {bomSkipped, previousBuf, rawBuffer, escapeIsQuote, trimChars} = this.state
359360
let buf
360361
if(previousBuf === undefined){
361362
if(nextBuf === undefined){
362363
// Handle empty string
363364
this.push(null)
364365
return
365366
}else{
366-
// Handle UTF BOM
367-
if(bom === true && bom_utf8.compare(nextBuf, 0, 3) === 0){
368-
buf = nextBuf.slice(3)
369-
}else{
370-
buf = nextBuf
371-
}
367+
buf = nextBuf
372368
}
373369
}else if(previousBuf !== undefined && nextBuf === undefined){
374370
buf = previousBuf
375371
}else{
376372
buf = Buffer.concat([previousBuf, nextBuf])
377373
}
374+
// Handle UTF BOM
375+
if(bomSkipped === false){
376+
if(bom === false){
377+
this.state.bomSkipped = true
378+
}else if(buf.length < 3){
379+
// No enough data
380+
if(end === false){
381+
// Wait for more data
382+
this.state.previousBuf = buf
383+
return
384+
}
385+
// skip BOM detect because data length < 3
386+
}else{
387+
if(bom_utf8.compare(buf, 0, 3) === 0){
388+
// Skip BOM
389+
buf = buf.slice(3)
390+
}
391+
this.state.bomSkipped = true
392+
}
393+
}
378394
const bufLen = buf.length
379395
let pos
380396
for(pos = 0; pos < bufLen; pos++){
@@ -529,7 +545,7 @@ class Parser extends Transform {
529545
if(err !== undefined) return err
530546
}
531547
}
532-
548+
533549
const lappend = ltrim === false || this.state.quoting === true || this.state.field.length !== 0 || !this.__isCharTrimable(chr)
534550
// rtrim in non quoting is handle in __onField
535551
const rappend = rtrim === false || this.state.wasQuoting === false
@@ -763,7 +779,7 @@ class Parser extends Transform {
763779
const numOfCharLeft = bufLen - i - 1
764780
const requiredLength = Math.max(
765781
// Skip if the remaining buffer smaller than comment
766-
comment ? comment.length : 0,
782+
comment ? comment.length : 0,
767783
// Skip if the remaining buffer smaller than row delimiter
768784
recordDelimiterMaxLength,
769785
// Skip if the remaining buffer can be row delimiter following the closing quote

package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
"@babel/preset-env": "^7.4.3",
5454
"@types/mocha": "^5.2.6",
5555
"@types/node": "^11.13.0",
56-
"@types/should": "^13.0.0",
5756
"coffeescript": "^2.4.0",
5857
"csv-generate": "^3.2.0",
5958
"csv-spectrum": "^1.0.0",

test/option.bom.coffee

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
parse = require '../lib'
33

44
describe 'Option `bom`', ->
5-
5+
66
it 'preserve bom if not defined', (next) ->
77
parser = parse (err, data) ->
88
data.should.eql [
@@ -24,7 +24,7 @@ describe 'Option `bom`', ->
2424
parser.write Buffer.from "\ufeffa,b,c\n"
2525
parser.write Buffer.from 'd,e,f'
2626
parser.end()
27-
27+
2828
it 'throw parsing error if quote follow bom', (next) ->
2929
parser = parse (err, data) ->
3030
err.message.should.eql 'Invalid opening quote at line 1'
@@ -55,4 +55,25 @@ describe 'Option `bom`', ->
5555
parser.write Buffer.from 'd,e,f'
5656
parser.end()
5757

58-
58+
it 'handle BOM even if no enough data in the first package', (next) ->
59+
parser = parse bom: true, (err, data) ->
60+
data.should.eql [
61+
['a', 'b', 'c']
62+
['d', 'e', 'f']
63+
]
64+
next()
65+
parser.write Buffer.from [239]
66+
parser.write Buffer.from [187]
67+
parser.write Buffer.from [191]
68+
parser.write Buffer.from "a,b,c\n"
69+
parser.write Buffer.from "d,e,f"
70+
parser.end()
71+
72+
it 'preserve data if no enough data to detect BOM', (next) ->
73+
parser = parse bom: true, (err, data) ->
74+
data.should.eql [
75+
['\ufffd']
76+
]
77+
next()
78+
parser.write Buffer.from [239, 187]
79+
parser.end()

0 commit comments

Comments
 (0)