Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
catamphetamine committed Feb 1, 2023
1 parent ff591a4 commit 863ea02
Show file tree
Hide file tree
Showing 13 changed files with 169 additions and 130 deletions.
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ readXlsxFile(file, { schema }).then(({ rows, errors }) => {
})
```

#### Tips and Features

<!-- If no `type` is specified then the cell value is returned "as is": as a string, number, date or boolean. -->

<!-- There are also some additional exported `type`s available: -->
Expand Down Expand Up @@ -280,19 +282,19 @@ readXlsxFile(file, {
</details>

<details>
<summary>How to fix spreadsheet data before <code>schema</code> parsing. For example, <strong>how to ignore empty rows</strong>.</summary>
<summary>How to fix spreadsheet data before <code>schema</code> parsing. For example, <strong>how to ignore irrelevant rows</strong>.</summary>

#####

Sometimes, a spreadsheet doesn't exactly have the structure required by this library's `schema` parsing feature: for example, it may be missing a header row, or contain some purely presentational / empty / "garbage" rows that should be removed. To fix that, one could pass an optional `transformData(data)` function that would modify the spreadsheet contents as required.
Sometimes, a spreadsheet doesn't exactly have the structure required by this library's `schema` parsing feature: for example, it may be missing a header row, or contain some purely presentational / irrelevant / "garbage" rows that should be removed. To fix that, one could pass an optional `transformData(data)` function that would modify the spreadsheet contents as required.

```js
readXlsxFile(file, {
schema,
transformData(data) {
// Add a missing header row.
return [['ID', 'NAME', ...]].concat(data)
// Remove empty rows.
// Remove irrelevant rows.
return data.filter(row => row.filter(column => column !== null).length > 0)
}
})
Expand Down
2 changes: 1 addition & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "read-excel-file",
"version": "5.5.3",
"version": "5.6.0",
"description": "Read small to medium `*.xlsx` files in a browser or Node.js. Parse to JSON with a strict schema.",
"module": "index.js",
"main": "index.cjs",
Expand Down
4 changes: 2 additions & 2 deletions source/read/readXlsxFileNode.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ describe('readXlsxFileNode', () => {
'IS FREE': {
prop: 'isFree',
type: Boolean
// Excel stored booleans as numbers:
// Excel stores booleans as numbers:
// `1` is `true` and `0` is `false`.
// Such numbers are parsed to booleans.
// Such numbers are parsed into booleans.
},
'COST': {
prop: 'cost',
Expand Down
134 changes: 14 additions & 120 deletions source/read/schema/convertToJson.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import parseDate from '../parseDate.js'

import Integer, { isInteger } from '../../types/Integer.js'
import URL, { isURL } from '../../types/URL.js'
import Email, { isEmail } from '../../types/Email.js'
import NumberType from '../../types/Number.js'
import StringType from '../../types/String.js'
import BooleanType from '../../types/Boolean.js'
import DateType from '../../types/Date.js'

const DEFAULT_OPTIONS = {
isColumnOriented: false
Expand Down Expand Up @@ -203,7 +202,11 @@ function parseCustomValue(value, parse) {
}
return { value }
} catch (error) {
return { error: error.message }
const result = { error: error.message }
if (error.reason) {
result.reason = error.reason;
}
return result
}
}

Expand All @@ -216,131 +219,22 @@ function parseCustomValue(value, parse) {
function parseValueOfType(value, type, options) {
switch (type) {
case String:
if (typeof value === 'string') {
return { value }
}
// Excel tends to perform a forced automatic convertion of string-type values
// to number-type ones when the user has input them. Otherwise, users wouldn't
// be able to perform formula calculations on those cell values because users
// won't bother manually choosing a "numeric" cell type for each cell, and
// even if they did, choosing a "numeric" cell type every time wouldn't be an
// acceptable "user experience".
//
// So, if a cell value is supposed to be a string and Excel has automatically
// converted it to a number, perform a backwards conversion.
//
if (typeof value === 'number') {
if (isNaN(value)) {
return { error: 'invalid', reason: 'invalid_number' }
}
// The global `isFinite()` function filters out:
// * NaN
// * -Infinity
// * Infinity
//
// All other values pass (including non-numbers).
//
if (!isFinite(value)) {
return { error: 'invalid', reason: 'out_of_bounds' }
}
return { value: String(value) }
}
return { error: 'invalid', reason: 'not_a_string' }
return parseCustomValue(value, StringType)

case Number:
case Integer:
// An XLSX file editing software might not always correctly
// detect numeric values in string-type cells. Users won't bother
// manually selecting a cell type, so the editing software has to guess
// based on the user's input. One can assume that such auto-detection
// might not always work.
//
// So, if a cell is supposed to be a numeric one, convert a string value to a number.
//
if (typeof value === 'string') {
const stringifiedValue = value
value = Number(value)
if (String(value) !== stringifiedValue) {
return { error: 'invalid', reason: 'not_a_number' }
}
}
if (typeof value !== 'number') {
return { error: 'invalid', reason: 'not_a_number' }
}
if (isNaN(value)) {
return { error: 'invalid', reason: 'invalid_number' }
}
// At this point, `value` can only be a number.
//
// The global `isFinite()` function filters out:
// * NaN
// * -Infinity
// * Infinity
//
// All other values pass (including non-numbers).
//
if (!isFinite(value)) {
return { error: 'invalid', reason: 'out_of_bounds' }
}
if (type === Integer && !isInteger(value)) {
return { error: 'invalid', reason: 'not_an_integer' }
}
return { value }

case URL:
if (typeof value === 'string') {
if (isURL(value)) {
return { value }
}
return { error: 'invalid', reason: 'not_a_url' }
}
return { error: 'invalid', reason: 'not_a_string' }

case Email:
if (typeof value === 'string') {
if (isEmail(value)) {
return { value }
}
return { error: 'invalid', reason: 'not_an_email' }
}
return { error: 'invalid', reason: 'not_a_string' }
return parseCustomValue(value, NumberType)

case Date:
// XLSX has no specific format for dates.
// Sometimes a date can be heuristically detected.
// https://github.com/catamphetamine/read-excel-file/issues/3#issuecomment-395770777
if (value instanceof Date) {
if (isNaN(value.valueOf())) {
return { error: 'invalid', reason: 'out_of_bounds' }
}
return { value }
}
if (typeof value === 'number') {
if (isNaN(value)) {
return { error: 'invalid', reason: 'invalid_number' }
}
if (!isFinite(value)) {
return { error: 'invalid', reason: 'out_of_bounds' }
}
const date = parseDate(value, options.properties)
if (isNaN(date.valueOf())) {
return { error: 'invalid', reason: 'out_of_bounds' }
}
return { value: date }
}
return { error: 'invalid', reason: 'not_a_date' }
return parseCustomValue(value, (value) => DateType(value, { properties: options.properties }))

case Boolean:
if (typeof value === 'boolean') {
return { value }
}
return { error: 'invalid', reason: 'not_a_boolean' }
return parseCustomValue(value, BooleanType)

default:
if (typeof type === 'function') {
return parseCustomValue(value, type)
}
throw new Error(`Unknown schema type: ${type && type.name || type}`)
throw new Error(`Unsupported schema type: ${type && type.name || type}`)
}
}

Expand Down
8 changes: 8 additions & 0 deletions source/types/Boolean.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import InvalidError from './InvalidError.js'

export default function BooleanType(value) {
if (typeof value === 'boolean') {
return value
}
throw new InvalidError('not_a_boolean')
}
28 changes: 28 additions & 0 deletions source/types/Date.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import parseDate from '../read/parseDate.js'
import InvalidError from './InvalidError.js'

export default function DateType(value, { properties }) {
// XLSX has no specific format for dates.
// Sometimes a date can be heuristically detected.
// https://github.com/catamphetamine/read-excel-file/issues/3#issuecomment-395770777
if (value instanceof Date) {
if (isNaN(value.valueOf())) {
throw new InvalidError('out_of_bounds')
}
return value
}
if (typeof value === 'number') {
if (isNaN(value)) {
throw new InvalidError('invalid_number')
}
if (!isFinite(value)) {
throw new InvalidError('out_of_bounds')
}
const date = parseDate(value, properties)
if (isNaN(date.valueOf())) {
throw new InvalidError('out_of_bounds')
}
return date
}
throw new InvalidError('not_a_date')
}
12 changes: 11 additions & 1 deletion source/types/Email.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
export default function Email() {}
import InvalidError from './InvalidError.js'

export default function Email(value) {
if (typeof value === 'string') {
if (isEmail(value)) {
return value
}
throw new InvalidError('not_an_email')
}
throw new InvalidError('not_a_string')
}

const regexp = /^[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}$/i

Expand Down
11 changes: 10 additions & 1 deletion source/types/Integer.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
export default function Integer() {}
import InvalidError from './InvalidError.js'
import NumberType from './Number.js'

export default function Integer(value) {
value = NumberType(value)
if (!isInteger(value)) {
throw new InvalidError('not_an_integer')
}
return value
}

export function isInteger(x) {
// https://stackoverflow.com/questions/14636536/how-to-check-if-a-variable-is-an-integer-in-javascript
Expand Down
6 changes: 6 additions & 0 deletions source/types/InvalidError.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
export default class InvalidError extends Error {
constructor(reason) {
super('invalid')
this.reason = reason
}
}
38 changes: 38 additions & 0 deletions source/types/Number.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import InvalidError from './InvalidError.js'

export default function NumberType(value) {
// An XLSX file editing software might not always correctly
// detect numeric values in string-type cells. Users won't bother
// manually selecting a cell type, so the editing software has to guess
// based on the user's input. One can assume that such auto-detection
// might not always work.
//
// So, if a cell is supposed to be a numeric one, convert a string value to a number.
//
if (typeof value === 'string') {
const stringifiedValue = value
value = Number(value)
if (String(value) !== stringifiedValue) {
throw new InvalidError('not_a_number')
}
}
if (typeof value !== 'number') {
throw new InvalidError('not_a_number')
}
if (isNaN(value)) {
throw new InvalidError('invalid_number')
}
// At this point, `value` can only be a number.
//
// The global `isFinite()` function filters out:
// * NaN
// * -Infinity
// * Infinity
//
// All other values pass (including non-numbers).
//
if (!isFinite(value)) {
throw new InvalidError('out_of_bounds')
}
return value
}
34 changes: 34 additions & 0 deletions source/types/String.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import InvalidError from './InvalidError.js'

export default function StringType(value) {
if (typeof value === 'string') {
return value
}
// Excel tends to perform a forced automatic convertion of string-type values
// to number-type ones when the user has input them. Otherwise, users wouldn't
// be able to perform formula calculations on those cell values because users
// won't bother manually choosing a "numeric" cell type for each cell, and
// even if they did, choosing a "numeric" cell type every time wouldn't be an
// acceptable "user experience".
//
// So, if a cell value is supposed to be a string and Excel has automatically
// converted it to a number, perform a backwards conversion.
//
if (typeof value === 'number') {
if (isNaN(value)) {
throw new InvalidError('invalid_number')
}
// The global `isFinite()` function filters out:
// * NaN
// * -Infinity
// * Infinity
//
// All other values pass (including non-numbers).
//
if (!isFinite(value)) {
throw new InvalidError('out_of_bounds')
}
return String(value)
}
throw new InvalidError('not_a_string')
}
12 changes: 11 additions & 1 deletion source/types/URL.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
export default function URL() {}
import InvalidError from './InvalidError.js'

export default function URL(value) {
if (typeof value === 'string') {
if (isURL(value)) {
return value
}
throw new InvalidError('not_a_url')
}
throw new InvalidError('not_a_string')
}

// URL regexp explanation:
//
Expand Down

0 comments on commit 863ea02

Please sign in to comment.