Skip to content

Commit 67f23dc

Browse files
.
1 parent 7d874fe commit 67f23dc

19 files changed

+530
-233
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
5.3.4 / 11.06.2022
2+
==================
3+
4+
* Added an optional `reason?: string` property of a with-schema parsing error.
5+
16
5.3.3 / 24.05.2022
27
==================
38

README.md

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ const schema = {
157157
}
158158

159159
readXlsxFile(file, { schema }).then(({ rows, errors }) => {
160-
// `errors` list items have shape: `{ row, column, error, value }`.
160+
// `errors` list items have shape: `{ row, column, error, reason?, value?, type? }`.
161161
errors.length === 0
162162

163163
rows === [{
@@ -241,27 +241,34 @@ const { rows, errors } = convertToJson(data, schema)
241241
```js
242242
import { parseExcelDate } from 'read-excel-file'
243243

244-
function ParseExcelError({ children: error }) {
245-
// Get a human-readable value.
246-
let value = error.value
247-
if (error.type === Date) {
248-
value = parseExcelDate(value).toString()
249-
}
250-
// Render error summary.
244+
function ParseExcelError({ children }) {
245+
const { type, value, error, reason, row, column } = children
246+
247+
// Error summary.
251248
return (
252249
<div>
253-
<code>"{error.error}"</code>
250+
<code>"{error}"</code>
251+
{reason && ' '}
252+
{reason && <code>("{reason}")</code>}
254253
{' for value '}
255-
<code>"{value}"</code>
254+
<code>{stringifyValue(value)}</code>
256255
{' in column '}
257-
<code>"{error.column}"</code>
258-
{error.type && ' of type '}
259-
{error.type && <code>"{error.type.name}"</code>}
256+
<code>"{column}"</code>
257+
{type && type.name && ' of type '}
258+
{type && type.name && <code>"{type.name}"</code>}
260259
{' in row '}
261-
<code>"{error.row}"</code>
260+
<code>{row}</code>
262261
</div>
263262
)
264263
}
264+
265+
function stringifyValue(value) {
266+
// Wrap strings in quotes.
267+
if (typeof value === 'string') {
268+
return '"' + value + '"'
269+
}
270+
return String(value)
271+
}
265272
```
266273
</details>
267274

package-lock.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "read-excel-file",
3-
"version": "5.3.3",
3+
"version": "5.3.4",
44
"description": "Read small to medium `*.xlsx` files in a browser or Node.js. Parse to JSON with a strict schema.",
55
"module": "index.js",
66
"main": "index.cjs",

source/read/isDateTimestamp.js

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
// XLSX does have "d" type for dates, but it's not commonly used.
2+
// Instead, it prefers using "n" type for storing dates as timestamps.
3+
//
4+
// Whether a numeric value is a number or a date timestamp, it sometimes could be
5+
// detected by looking at the value "format" and seeing if it's a date-specific one.
6+
// https://github.com/catamphetamine/read-excel-file/issues/3#issuecomment-395770777
7+
//
8+
// The list of generic numeric value "formats":
9+
// https://xlsxwriter.readthedocs.io/format.html#format-set-num-format
10+
//
11+
export default function isDateTimestamp(value, styleId, styles, options) {
12+
if (styleId) {
13+
const style = styles[styleId]
14+
if (!style) {
15+
throw new Error(`Cell style not found: ${styleId}`)
16+
}
17+
if (
18+
// Whether it's a "number format" that's conventionally used for storing date timestamps.
19+
BUILT_IN_DATE_NUMBER_FORMAT_IDS.indexOf(parseInt(style.numberFormat.id)) >= 0 ||
20+
// Whether it's a "number format" that uses a "formatting template"
21+
// that the developer is certain is a date formatting template.
22+
(options.dateFormat && style.numberFormat.template === options.dateFormat) ||
23+
// Whether the "smart formatting template" feature is not disabled
24+
// and it has detected that it's a date formatting template by looking at it.
25+
(options.smartDateParser !== false && style.numberFormat.template && isDateTemplate(style.numberFormat.template))
26+
) {
27+
return true
28+
}
29+
}
30+
}
31+
32+
// https://hexdocs.pm/xlsxir/number_styles.html
33+
const BUILT_IN_DATE_NUMBER_FORMAT_IDS = [14,15,16,17,18,19,20,21,22,27,30,36,45,46,47,50,57]
34+
35+
// On some date formats, there's an "[$-414]" prefix.
36+
// I don't have any idea what that is.
37+
//
38+
// https://stackoverflow.com/questions/4730152/what-indicates-an-office-open-xml-cell-contains-a-date-time-value
39+
//
40+
// Examples:
41+
//
42+
// * 27 (built-in format) "[$-404]e/m/d"
43+
// * 164 (custom format) "[$-414]mmmm\ yyyy;@"
44+
//
45+
const DATE_FORMAT_WEIRD_PREFIX = /^\[\$-414\]/
46+
47+
// On some date formats, there's an ";@" postfix.
48+
// I don't have any idea what that is.
49+
// Examples:
50+
//
51+
// * 164 (custom format) "m/d/yyyy;@"
52+
// * 164 (custom format) "[$-414]mmmm\ yyyy;@"
53+
//
54+
const DATE_FORMAT_WEIRD_POSTFIX = /;@$/
55+
56+
function isDateTemplate(template) {
57+
// Date format tokens could be in upper case or in lower case.
58+
// There seems to be no single standard.
59+
// So lowercase the template first.
60+
template = template.toLowerCase()
61+
62+
// On some date formats, there's an "[$-414]" prefix.
63+
// I don't have any idea what that is. Trim it.
64+
template = template.replace(DATE_FORMAT_WEIRD_PREFIX, '')
65+
66+
// On some date formats, there's an ";@" postfix.
67+
// I don't have any idea what that is. Trim it.
68+
template = template.replace(DATE_FORMAT_WEIRD_POSTFIX, '')
69+
70+
const tokens = template.split(/\W+/)
71+
for (const token of tokens) {
72+
if (DATE_TEMPLATE_TOKENS.indexOf(token) < 0) {
73+
return false
74+
}
75+
}
76+
return true
77+
}
78+
79+
// These tokens could be in upper case or in lower case.
80+
// There seems to be no single standard, so using lower case.
81+
const DATE_TEMPLATE_TOKENS = [
82+
// Seconds (min two digits). Example: "05".
83+
'ss',
84+
// Minutes (min two digits). Example: "05". Could also be "Months". Weird.
85+
'mm',
86+
// Hours. Example: "1".
87+
'h',
88+
// Hours (min two digits). Example: "01".
89+
'hh',
90+
// "AM" part of "AM/PM". Lowercased just in case.
91+
'am',
92+
// "PM" part of "AM/PM". Lowercased just in case.
93+
'pm',
94+
// Day. Example: "1"
95+
'd',
96+
// Day (min two digits). Example: "01"
97+
'dd',
98+
// Month (numeric). Example: "1".
99+
'm',
100+
// Month (numeric, min two digits). Example: "01". Could also be "Minutes". Weird.
101+
'mm',
102+
// Month (shortened month name). Example: "Jan".
103+
'mmm',
104+
// Month (full month name). Example: "January".
105+
'mmmm',
106+
// Two-digit year. Example: "20".
107+
'yy',
108+
// Full year. Example: "2020".
109+
'yyyy',
110+
111+
// I don't have any idea what "e" means.
112+
// It's used in "built-in" XLSX formats:
113+
// * 27 '[$-404]e/m/d';
114+
// * 36 '[$-404]e/m/d';
115+
// * 50 '[$-404]e/m/d';
116+
// * 57 '[$-404]e/m/d';
117+
'e'
118+
];

source/read/parseCell.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ import {
99
getCellInlineStringValue
1010
} from '../xml/xlsx.js'
1111

12+
import {
13+
getOuterXml
14+
} from '../xml/dom.js'
15+
1216
// Example of a `<c/>`ell element:
1317
//
1418
// <c>
@@ -54,6 +58,7 @@ export default function parseCell(node, sheet, xml, values, styles, properties,
5458
column: coords[1],
5559
value: parseCellValue(value, type, {
5660
getInlineStringValue: () => getCellInlineStringValue(sheet, node),
61+
getInlineStringXml: () => getOuterXml(node),
5762
getStyleId: () => node.getAttribute('s'),
5863
styles,
5964
values,

0 commit comments

Comments
 (0)