Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
catamphetamine committed Jun 11, 2022
1 parent 7d874fe commit 67f23dc
Show file tree
Hide file tree
Showing 19 changed files with 530 additions and 233 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
5.3.4 / 11.06.2022
==================

* Added an optional `reason?: string` property of a with-schema parsing error.

5.3.3 / 24.05.2022
==================

Expand Down
35 changes: 21 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ const schema = {
}

readXlsxFile(file, { schema }).then(({ rows, errors }) => {
// `errors` list items have shape: `{ row, column, error, value }`.
// `errors` list items have shape: `{ row, column, error, reason?, value?, type? }`.
errors.length === 0

rows === [{
Expand Down Expand Up @@ -241,27 +241,34 @@ const { rows, errors } = convertToJson(data, schema)
```js
import { parseExcelDate } from 'read-excel-file'

function ParseExcelError({ children: error }) {
// Get a human-readable value.
let value = error.value
if (error.type === Date) {
value = parseExcelDate(value).toString()
}
// Render error summary.
function ParseExcelError({ children }) {
const { type, value, error, reason, row, column } = children

// Error summary.
return (
<div>
<code>"{error.error}"</code>
<code>"{error}"</code>
{reason && ' '}
{reason && <code>("{reason}")</code>}
{' for value '}
<code>"{value}"</code>
<code>{stringifyValue(value)}</code>
{' in column '}
<code>"{error.column}"</code>
{error.type && ' of type '}
{error.type && <code>"{error.type.name}"</code>}
<code>"{column}"</code>
{type && type.name && ' of type '}
{type && type.name && <code>"{type.name}"</code>}
{' in row '}
<code>"{error.row}"</code>
<code>{row}</code>
</div>
)
}

function stringifyValue(value) {
// Wrap strings in quotes.
if (typeof value === 'string') {
return '"' + value + '"'
}
return String(value)
}
```
</details>

Expand Down
2 changes: 1 addition & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "read-excel-file",
"version": "5.3.3",
"version": "5.3.4",
"description": "Read small to medium `*.xlsx` files in a browser or Node.js. Parse to JSON with a strict schema.",
"module": "index.js",
"main": "index.cjs",
Expand Down
118 changes: 118 additions & 0 deletions source/read/isDateTimestamp.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
// XLSX does have "d" type for dates, but it's not commonly used.
// Instead, it prefers using "n" type for storing dates as timestamps.
//
// Whether a numeric value is a number or a date timestamp, it sometimes could be
// detected by looking at the value "format" and seeing if it's a date-specific one.
// https://github.com/catamphetamine/read-excel-file/issues/3#issuecomment-395770777
//
// The list of generic numeric value "formats":
// https://xlsxwriter.readthedocs.io/format.html#format-set-num-format
//
export default function isDateTimestamp(value, styleId, styles, options) {
if (styleId) {
const style = styles[styleId]
if (!style) {
throw new Error(`Cell style not found: ${styleId}`)
}
if (
// Whether it's a "number format" that's conventionally used for storing date timestamps.
BUILT_IN_DATE_NUMBER_FORMAT_IDS.indexOf(parseInt(style.numberFormat.id)) >= 0 ||
// Whether it's a "number format" that uses a "formatting template"
// that the developer is certain is a date formatting template.
(options.dateFormat && style.numberFormat.template === options.dateFormat) ||
// Whether the "smart formatting template" feature is not disabled
// and it has detected that it's a date formatting template by looking at it.
(options.smartDateParser !== false && style.numberFormat.template && isDateTemplate(style.numberFormat.template))
) {
return true
}
}
}

// https://hexdocs.pm/xlsxir/number_styles.html
const BUILT_IN_DATE_NUMBER_FORMAT_IDS = [14,15,16,17,18,19,20,21,22,27,30,36,45,46,47,50,57]

// On some date formats, there's an "[$-414]" prefix.
// I don't have any idea what that is.
//
// https://stackoverflow.com/questions/4730152/what-indicates-an-office-open-xml-cell-contains-a-date-time-value
//
// Examples:
//
// * 27 (built-in format) "[$-404]e/m/d"
// * 164 (custom format) "[$-414]mmmm\ yyyy;@"
//
const DATE_FORMAT_WEIRD_PREFIX = /^\[\$-414\]/

// On some date formats, there's an ";@" postfix.
// I don't have any idea what that is.
// Examples:
//
// * 164 (custom format) "m/d/yyyy;@"
// * 164 (custom format) "[$-414]mmmm\ yyyy;@"
//
const DATE_FORMAT_WEIRD_POSTFIX = /;@$/

function isDateTemplate(template) {
// Date format tokens could be in upper case or in lower case.
// There seems to be no single standard.
// So lowercase the template first.
template = template.toLowerCase()

// On some date formats, there's an "[$-414]" prefix.
// I don't have any idea what that is. Trim it.
template = template.replace(DATE_FORMAT_WEIRD_PREFIX, '')

// On some date formats, there's an ";@" postfix.
// I don't have any idea what that is. Trim it.
template = template.replace(DATE_FORMAT_WEIRD_POSTFIX, '')

const tokens = template.split(/\W+/)
for (const token of tokens) {
if (DATE_TEMPLATE_TOKENS.indexOf(token) < 0) {
return false
}
}
return true
}

// These tokens could be in upper case or in lower case.
// There seems to be no single standard, so using lower case.
const DATE_TEMPLATE_TOKENS = [
// Seconds (min two digits). Example: "05".
'ss',
// Minutes (min two digits). Example: "05". Could also be "Months". Weird.
'mm',
// Hours. Example: "1".
'h',
// Hours (min two digits). Example: "01".
'hh',
// "AM" part of "AM/PM". Lowercased just in case.
'am',
// "PM" part of "AM/PM". Lowercased just in case.
'pm',
// Day. Example: "1"
'd',
// Day (min two digits). Example: "01"
'dd',
// Month (numeric). Example: "1".
'm',
// Month (numeric, min two digits). Example: "01". Could also be "Minutes". Weird.
'mm',
// Month (shortened month name). Example: "Jan".
'mmm',
// Month (full month name). Example: "January".
'mmmm',
// Two-digit year. Example: "20".
'yy',
// Full year. Example: "2020".
'yyyy',

// I don't have any idea what "e" means.
// It's used in "built-in" XLSX formats:
// * 27 '[$-404]e/m/d';
// * 36 '[$-404]e/m/d';
// * 50 '[$-404]e/m/d';
// * 57 '[$-404]e/m/d';
'e'
];
5 changes: 5 additions & 0 deletions source/read/parseCell.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ import {
getCellInlineStringValue
} from '../xml/xlsx.js'

import {
getOuterXml
} from '../xml/dom.js'

// Example of a `<c/>`ell element:
//
// <c>
Expand Down Expand Up @@ -54,6 +58,7 @@ export default function parseCell(node, sheet, xml, values, styles, properties,
column: coords[1],
value: parseCellValue(value, type, {
getInlineStringValue: () => getCellInlineStringValue(sheet, node),
getInlineStringXml: () => getOuterXml(node),
getStyleId: () => node.getAttribute('s'),
styles,
values,
Expand Down
Loading

1 comment on commit 67f23dc

@renatocfrancisco
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bruh

Please sign in to comment.