Skip to content

Commit

Permalink
version bump 0.11.13: codepage
Browse files Browse the repository at this point in the history
- binary CSV `codepage` read option (fixes SheetJS#907 h/t @popovserhii)
- BIFF2-5 `codepage` read option (fixes SheetJS#912 h/t @makcbrain)
- `xlsx` utility `--codepage` override option
- HTML support some common entities (fixes SheetJS#914 h/t @razvanioan)
  • Loading branch information
SheetJSDev committed Dec 9, 2017
1 parent eff7d15 commit c9cab80
Show file tree
Hide file tree
Showing 26 changed files with 187 additions and 81 deletions.
5 changes: 5 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,8 @@ bits/*.js text eol=lf
test.js text eol=lf
xlsx*.js text eol=lf
*.flow.js text eol=lf

docbits/* linguist-documentation
dist/* linguist-generated=true
xlsx.js linguist-generated=true
xlsxworker.js linguist-generated=true
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ nexe: xlsx.exe ## Build nexe standalone executable
xlsx.exe: bin/xlsx.njs xlsx.js
tail -n+2 $< | sed 's#\.\./#./xlsx#g' > nexe.js
nexe -i nexe.js -o $@
head nexe.js
rm nexe.js

.PHONY: pkg
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1493,6 +1493,7 @@ The exported `read` and `readFile` functions accept an options argument:
| :---------- | ------: | :--------------------------------------------------- |
|`type` | | Input data encoding (see Input Type below) |
|`raw` | false | If true, plain text parsing will not parse values ** |
|`codepage` | 1252 | If specified, use code page when appropriate ** |
|`cellFormula`| true | Save formulae to the .f field |
|`cellHTML` | true | Parse rich text and save HTML to the `.h` field |
|`cellNF` | false | Save number format string to the `.z` field |
Expand Down Expand Up @@ -1526,6 +1527,8 @@ The exported `read` and `readFile` functions accept an options argument:
XLSM and XLSB store the VBA CFB object in `xl/vbaProject.bin`. BIFF8 XLS mixes
the VBA entries alongside the core Workbook entry, so the library generates a
new XLSB-compatible blob from the XLS CFB container.
- `codepage` is applied to BIFF2 - BIFF5 files without `CodePage` records and to
CSV files without BOM in `type:"binary"`. BIFF8 XLS always defaults to 1200.
- Currently only XOR encryption is supported. Unsupported error will be thrown
for files employing other encryption methods.
- WTF is mainly for development. By default, the parser will suppress read
Expand Down
7 changes: 7 additions & 0 deletions bin/xlsx.njs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ program
.option('-F, --field-sep <sep>', 'CSV field separator', ",")
.option('-R, --row-sep <sep>', 'CSV row separator', "\n")
.option('-n, --sheet-rows <num>', 'Number of rows to process (0=all rows)')
.option('--codepage <cp>', 'default to specified codepage when ambiguous')
.option('--req <module>', 'require module before processing')
.option('--sst', 'generate shared string table for XLS* formats')
.option('--compress', 'use compression when writing XLSX/M/B and ODS')
.option('--read', 'read but do not generate output')
Expand Down Expand Up @@ -91,6 +93,10 @@ if(!fs.existsSync(filename)) {
process.exit(2);
}

if(program.req) program.req.split(",").forEach(function(r) {
require((fs.existsSync(r) || fs.existsSync(r + '.js')) ? require('path').resolve(r) : r);
});

var opts = {}, wb/*:?Workbook*/;
if(program.listSheets) opts.bookSheets = true;
if(program.sheetRows) opts.sheetRows = program.sheetRows;
Expand Down Expand Up @@ -128,6 +134,7 @@ if(program.all) {
wopts.bookVBA = true;
}
if(program.sparse) opts.dense = false; else opts.dense = true;
if(program.codepage) opts.codepage = +program.codepage;

if(program.dev) {
opts.WTF = true;
Expand Down
2 changes: 1 addition & 1 deletion bits/01_version.js
Original file line number Diff line number Diff line change
@@ -1 +1 @@
XLSX.version = '0.11.12';
XLSX.version = '0.11.13';
14 changes: 11 additions & 3 deletions bits/22_xmlutils.js
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,17 @@ var matchtag = (function() {
};
})();

function htmldecode(str/*:string*/)/*:string*/ {
return str.trim().replace(/\s+/g, " ").replace(/<\s*[bB][rR]\s*\/?>/g,"\n").replace(/<[^>]*>/g,"").replace(/&nbsp;/g, " ");
}
var htmldecode = (function() {
var entities = [
['nbsp', ' '], ['middot', '·'],
['quot', '"'], ['apos', "'"], ['gt', '>'], ['lt', '<'], ['amp', '&']
].map(function(x) { return [new RegExp('&' + x[0] + ';', "g"), x[1]]; });
return function htmldecode(str/*:string*/)/*:string*/ {
var o = str.trim().replace(/\s+/g, " ").replace(/<\s*[bB][rR]\s*\/?>/g,"\n").replace(/<[^>]*>/g,"");
for(var i = 0; i < entities.length; ++i) o = o.replace(entities[i][0], entities[i][1]);
return o;
};
})();

var vtregex = (function(){ var vt_cache = {};
return function vt_regex(bt) {
Expand Down
1 change: 1 addition & 0 deletions bits/40_harb.js
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,7 @@ var PRN = (function() {
default: throw new Error("Unrecognized type " + opts.type);
}
if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str.slice(3));
else if((opts.type == 'binary' || opts.type == 'buffer') && typeof cptable !== 'undefined' && opts.codepage) str = cptable.utils.decode(opts.codepage, cptable.utils.encode(1252,str));
if(str.slice(0,19) == "socialcalc:version:") return ETH.to_sheet(opts.type == 'string' ? str : utf8read(str), opts);
return prn_to_sheet_str(str, opts);
}
Expand Down
6 changes: 4 additions & 2 deletions bits/76_xls.js
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
/* explicit override for some broken writers */
opts.codepage = 1200;
set_cp(1200);
var seen_codepage = false;
while(blob.l < blob.length - 1) {
var s = blob.l;
var RecordType = blob.read_shift(2);
Expand Down Expand Up @@ -247,8 +248,8 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
case 0x8000: val = 10000; break;
case 0x8001: val = 1252; break;
}
opts.codepage = val;
set_cp(val);
set_cp(opts.codepage = val);
seen_codepage = true;
break;
case 'RRTabId': opts.rrtabid = val; break;
case 'WinProtect': opts.winlocked = val; break;
Expand Down Expand Up @@ -346,6 +347,7 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
cell_valid = true;
out = ((options.dense ? [] : {})/*:any*/);

if(opts.biff < 8 && !seen_codepage) { seen_codepage = true; set_cp(opts.codepage = options.codepage || 1252); }
if(opts.biff < 5) {
if(cur_sheet === "") cur_sheet = "Sheet1";
range = {s:{r:0,c:0},e:{r:0,c:0}};
Expand Down
24 changes: 14 additions & 10 deletions demos/headless/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,30 @@ $ npm install -g phantomjs
$ phantomjs phantomjs.js
```

## wkhtmltopdf
## Chrome Automation

This was tested in wkhtmltopdf 0.12.4, installed using the official binaries:

```bash
$ wkhtmltopdf --javascript-delay 20000 http://oss.sheetjs.com/js-xlsx/tests/ test.pdf
```

## Puppeteer

This was tested in puppeteer 0.9.0 and Chromium revision 494755:
This was tested in puppeteer 0.9.0 (Chromium revision 494755) and `chromeless`:

```bash
$ npm install puppeteer
$ node puppeteer.js

$ npm install -g chromeless
$ node chromeless.js
```

Since the main process is node, the read and write features should be placed in
the webpage. The `dist` versions are suitable for web pages.


## wkhtmltopdf

This was tested in wkhtmltopdf 0.12.4, installed using the official binaries:

```bash
$ wkhtmltopdf --javascript-delay 20000 http://oss.sheetjs.com/js-xlsx/tests/ test.pdf
```

## SlimerJS

This was tested in SlimerJS 0.10.3 and FF 52.0, installed using `brew` on OSX:
Expand Down
9 changes: 9 additions & 0 deletions demos/headless/chromeless.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
const { Chromeless } = require('chromeless');
const TEST = 'http://localhost:8000', TIME = 30 * 1000;
(async() => {
const browser = new Chromeless();
const pth = await browser.goto(TEST).wait(TIME).screenshot();
console.log(pth);
await browser.end();
})().catch(e=>{ console.error(e); });

26 changes: 13 additions & 13 deletions dist/xlsx.core.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/xlsx.core.min.map

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions dist/xlsx.full.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/xlsx.full.min.map

Large diffs are not rendered by default.

23 changes: 17 additions & 6 deletions dist/xlsx.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 12 additions & 12 deletions dist/xlsx.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion dist/xlsx.min.map

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions docbits/80_parseopts.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ The exported `read` and `readFile` functions accept an options argument:
| :---------- | ------: | :--------------------------------------------------- |
|`type` | | Input data encoding (see Input Type below) |
|`raw` | false | If true, plain text parsing will not parse values ** |
|`codepage` | | If specified, use code page when appropriate ** |
|`cellFormula`| true | Save formulae to the .f field |
|`cellHTML` | true | Parse rich text and save HTML to the `.h` field |
|`cellNF` | false | Save number format string to the `.z` field |
Expand Down Expand Up @@ -39,6 +40,8 @@ The exported `read` and `readFile` functions accept an options argument:
XLSM and XLSB store the VBA CFB object in `xl/vbaProject.bin`. BIFF8 XLS mixes
the VBA entries alongside the core Workbook entry, so the library generates a
new XLSB-compatible blob from the XLS CFB container.
- `codepage` is applied to BIFF2 - BIFF5 files without `CodePage` records and to
CSV files without BOM in `type:"binary"`. BIFF8 XLS always defaults to 1200.
- Currently only XOR encryption is supported. Unsupported error will be thrown
for files employing other encryption methods.
- WTF is mainly for development. By default, the parser will suppress read
Expand Down
3 changes: 3 additions & 0 deletions formats.dot
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ digraph G {
wk3 [label="WK3/4"];
wqb [label="WQ*\nWB*"];
qpw [label="QPW"];
eth [label="ETH"];
}

subgraph WORKBOOK {
Expand Down Expand Up @@ -75,5 +76,7 @@ digraph G {
csf -> dbf
html -> csf
csf -> html
csf -> eth
eth -> csf
}
}
Binary file modified formats.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions misc/docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1364,6 +1364,7 @@ The exported `read` and `readFile` functions accept an options argument:
| :---------- | ------: | :--------------------------------------------------- |
|`type` | | Input data encoding (see Input Type below) |
|`raw` | false | If true, plain text parsing will not parse values ** |
|`codepage` | 1252 | If specified, use code page when appropriate ** |
|`cellFormula`| true | Save formulae to the .f field |
|`cellHTML` | true | Parse rich text and save HTML to the `.h` field |
|`cellNF` | false | Save number format string to the `.z` field |
Expand Down Expand Up @@ -1397,6 +1398,8 @@ The exported `read` and `readFile` functions accept an options argument:
XLSM and XLSB store the VBA CFB object in `xl/vbaProject.bin`. BIFF8 XLS mixes
the VBA entries alongside the core Workbook entry, so the library generates a
new XLSB-compatible blob from the XLS CFB container.
- `codepage` is applied to BIFF2 - BIFF5 files without `CodePage` records and to
CSV files without BOM in `type:"binary"`. BIFF8 XLS always defaults to 1200.
- Currently only XOR encryption is supported. Unsupported error will be thrown
for files employing other encryption methods.
- WTF is mainly for development. By default, the parser will suppress read
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "xlsx",
"version": "0.11.12",
"version": "0.11.13",
"author": "sheetjs",
"description": "SheetJS Spreadsheet data parser and writer",
"keywords": [
Expand Down
17 changes: 15 additions & 2 deletions test.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ if(typeof process != 'undefined' && ((process||{}).env)) {
if(process.env.FMTS) ex=process.env.FMTS.split(":").map(function(x){return x[0]==="."?x:"."+x;});
}
var exp = ex.map(function(x){ return x + ".pending"; });
function test_file(x){ return ex.indexOf(x.substr(-5))>=0||exp.indexOf(x.substr(-13))>=0 || ex.indexOf(x.substr(-4))>=0||exp.indexOf(x.substr(-12))>=0; }
function test_file(x){ return ex.indexOf(x.slice(-5))>=0||exp.indexOf(x.slice(-13))>=0 || ex.indexOf(x.slice(-4))>=0||exp.indexOf(x.slice(-12))>=0; }

var files = browser ? [] : (fs.existsSync('tests.lst') ? fs.readFileSync('tests.lst', 'utf-8').split("\n").map(function(x) { return x.trim(); }) : fs.readdirSync('test_files')).filter(test_file);
var fileA = browser ? [] : (fs.existsSync('tests/testA.lst') ? fs.readFileSync('tests/testA.lst', 'utf-8').split("\n").map(function(x) { return x.trim(); }) : []).filter(test_file);
Expand Down Expand Up @@ -1686,7 +1686,7 @@ var html_bstr = make_html_str(1), html_str = make_html_str(0);
var csv_bstr = make_csv_str(1), csv_str = make_csv_str(0);


describe('csv', function() {
describe('CSV', function() {
describe('input', function(){
var b = "1,2,3,\nTRUE,FALSE,,sheetjs\nfoo,bar,2/19/14,0.3\n,,,\nbaz,,qux,\n";
it('should generate date numbers by default', function() {
Expand Down Expand Up @@ -1743,6 +1743,13 @@ describe('csv', function() {
assert.equal(get_cell(sheet, "C1").t, 's');
assert.equal(get_cell(sheet, "C1").v, '100');
});
if(!browser || typeof cptable !== 'undefined') it('should honor codepage for binary strings', function() {
var data = "abc,def\nghi,j\xD3l";
[[1251, 'У'],[1252, 'Ó'], [1253, 'Σ'], [1254, 'Ó'], [1255, '׃'], [1256, 'س'], [10000, '”']].forEach(function(m) {
var ws = X.read(data, {type:"binary", codepage:m[0]}).Sheets.Sheet1;
assert.equal(get_cell(ws, "B2").v, "j" + m[1] + "l");
});
});
});
describe('output', function(){
var data, ws;
Expand Down Expand Up @@ -1868,6 +1875,12 @@ describe('HTML', function() {
assert.equal(get_cell(wb.Sheets.Sheet1, "A1").v, "foo\nbar");
});
});
if(domtest) it('should handle entities', function() {
var html = "<table><tr><td>A&amp;B</td><td>A&middot;B</td></tr></table>";
var ws = X.utils.table_to_sheet(get_dom_element(html));
assert.equal(get_cell(ws, "A1").v, "A&B");
assert.equal(get_cell(ws, "B1").v, "A·B");
});
});

describe('js -> file -> js', function() {
Expand Down
19 changes: 16 additions & 3 deletions tests/core.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ if(typeof process != 'undefined' && ((process||{}).env)) {
if(process.env.FMTS) ex=process.env.FMTS.split(":").map(function(x){return x[0]==="."?x:"."+x;});
}
var exp = ex.map(function(x){ return x + ".pending"; });
function test_file(x){ return ex.indexOf(x.substr(-5))>=0||exp.indexOf(x.substr(-13))>=0 || ex.indexOf(x.substr(-4))>=0||exp.indexOf(x.substr(-12))>=0; }
function test_file(x){ return ex.indexOf(x.slice(-5))>=0||exp.indexOf(x.slice(-13))>=0 || ex.indexOf(x.slice(-4))>=0||exp.indexOf(x.slice(-12))>=0; }

var files = browser ? [] : (fs.existsSync('tests.lst') ? fs.readFileSync('tests.lst', 'utf-8').split("\n").map(function(x) { return x.trim(); }) : fs.readdirSync('test_files')).filter(test_file);
var fileA = browser ? [] : (fs.existsSync('tests/testA.lst') ? fs.readFileSync('tests/testA.lst', 'utf-8').split("\n").map(function(x) { return x.trim(); }) : []).filter(test_file);
Expand Down Expand Up @@ -623,7 +623,7 @@ describe('output formats', function() {
["csv", true, true],
["txt", true, true],
["sylk", false, true],
["eth", true, true],
["eth", false, true],
["html", true, true],
["dif", false, true],
["dbf", false, false],
Expand Down Expand Up @@ -1686,7 +1686,7 @@ var html_bstr = make_html_str(1), html_str = make_html_str(0);
var csv_bstr = make_csv_str(1), csv_str = make_csv_str(0);


describe('csv', function() {
describe('CSV', function() {
describe('input', function(){
var b = "1,2,3,\nTRUE,FALSE,,sheetjs\nfoo,bar,2/19/14,0.3\n,,,\nbaz,,qux,\n";
it('should generate date numbers by default', function() {
Expand Down Expand Up @@ -1743,6 +1743,13 @@ describe('csv', function() {
assert.equal(get_cell(sheet, "C1").t, 's');
assert.equal(get_cell(sheet, "C1").v, '100');
});
if(!browser || typeof cptable !== 'undefined') it('should honor codepage for binary strings', function() {
var data = "abc,def\nghi,j\xD3l";
[[1251, 'У'],[1252, 'Ó'], [1253, 'Σ'], [1254, 'Ó'], [1255, '׃'], [1256, 'س'], [10000, '”']].forEach(function(m) {
var ws = X.read(data, {type:"binary", codepage:m[0]}).Sheets.Sheet1;
assert.equal(get_cell(ws, "B2").v, "j" + m[1] + "l");
});
});
});
describe('output', function(){
var data, ws;
Expand Down Expand Up @@ -1868,6 +1875,12 @@ describe('HTML', function() {
assert.equal(get_cell(wb.Sheets.Sheet1, "A1").v, "foo\nbar");
});
});
if(domtest) it('should handle entities', function() {
var html = "<table><tr><td>A&amp;B</td><td>A&middot;B</td></tr></table>";
var ws = X.utils.table_to_sheet(get_dom_element(html));
assert.equal(get_cell(ws, "A1").v, "A&B");
assert.equal(get_cell(ws, "B1").v, "A·B");
});
});

describe('js -> file -> js', function() {
Expand Down
Loading

0 comments on commit c9cab80

Please sign in to comment.