Skip to content

Commit

Permalink
feat: handle empty or really bad csv files
Browse files Browse the repository at this point in the history
Includes a refactor to save us a full table scan.
  • Loading branch information
stdavis authored and steveoh committed Feb 9, 2022
1 parent b951ff2 commit 5480091
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 46 deletions.
1 change: 1 addition & 0 deletions src/components/GeocodeContext.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const reducer = (draft, action) => {
draft.data.street = action.payload.street;
draft.data.zone = action.payload.zone;
draft.data.sampleData = action.payload.sampleData;
draft.data.totalRecords = action.payload.totalRecords;
break;
case 'UPDATE_FIELDS':
draft.data[action.meta] = action.payload;
Expand Down
11 changes: 6 additions & 5 deletions src/components/InvalidCsv.jsx
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
const errors = {
CSV_RECORD_INCONSISTENT_COLUMNS:
'that a record did not contain the same amount of fields as the previous record. Somewhere in your file, a row is missing or has extra field delimiters.',
CSV_INVALID_CLOSING_QUOTE: 'a quote in an unexpected location. Please check the quotes in your CSV file.',
'that a record did not contain the same amount of fields as the previous record. Somewhere in the file, a row is missing or has extra field delimiters.',
CSV_INVALID_CLOSING_QUOTE: 'a quote in an unexpected location. Please check the quotes in the CSV file.',
CSV_RECORD_INCONSISTENT_FIELDS_LENGTH:
'that a record did not contain the same amount of fields as the previous record. Somewhere in your file, a row is missing or has extra field delimiters.',
'that a record did not contain the same amount of fields as the previous record. Somewhere in the file, a row is missing or has extra field delimiters.',
CSV_RECORD_DONT_MATCH_COLUMNS_LENGTH:
'that a record did not contain the same amount of columns. Somewhere in your file, a row is missing or has extra field delimiters.',
CSV_QUOTE_NOT_CLOSED: 'an open quote that was not closed. Please check the quotes in your CSV file.',
'that a record did not contain the same amount of columns. Somewhere in the file, a row is missing or has extra field delimiters.',
CSV_QUOTE_NOT_CLOSED: 'an open quote that was not closed. Please check the quotes in the CSV file.',
INVALID_OR_EMPTY_FILE: 'no records. The file is empty or invalid.',
};

export const CSV_PARSE_ERROR = 'CSV_PARSE_ERROR';
Expand Down
9 changes: 5 additions & 4 deletions src/pages/Data.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ export default function Data() {

const file = files[0];
setError();
let newSample;
let stats;
try {
newSample = await window.ugrc.getSampleFromFile(file.path);
stats = await window.ugrc.validateWithStats(file.path);
} catch (e) {
const errorDetails = [];
if (e.message.includes(CSV_PARSE_ERROR)) {
Expand All @@ -62,7 +62,7 @@ export default function Data() {
handleError(e);
}

const fields = Object.keys(newSample);
const fields = Object.keys(stats.firstRecord);

geocodeDispatch({
type: 'UPDATE_FILE',
Expand All @@ -71,7 +71,8 @@ export default function Data() {
fieldsFromFile: fields,
street: chooseCommonFieldName('street', fields, commonFieldNames.current),
zone: chooseCommonFieldName('zone', fields, commonFieldNames.current),
sampleData: newSample,
sampleData: stats.firstRecord,
totalRecords: stats.totalRecords,
},
});
};
Expand Down
12 changes: 2 additions & 10 deletions src/pages/Plan.jsx
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
import { useEffect, useState } from 'react';
import humanizeDuration from 'humanize-duration';
import { Link, useHistory } from 'react-router-dom';
import { useErrorHandler } from 'react-error-boundary';
import { useGeocodeContext } from '../components/GeocodeContext';

export default function Plan() {
const { geocodeContext } = useGeocodeContext();
const [rows, setRows] = useState(0);
const history = useHistory();
const duration = (rows / 3) * 1000;
const handleError = useErrorHandler();

useEffect(() => {
window.ugrc.getRecordCount(geocodeContext.data.file.path).then(setRows).catch(handleError);
}, [geocodeContext.data.file.path, handleError]);
const duration = (geocodeContext.data.totalRecords / 3) * 1000;

const start = () => {
history.push('/geocode');
Expand All @@ -27,7 +19,7 @@ export default function Plan() {
<h2>The plan</h2>
<section className="flex items-stretch justify-around w-full mb-10 text-center border divide-x-2 divide-gray-100 rounded-lg shadow-lg">
<div className="flex-1 p-6">
<h2 className="my-0 text-indigo-600">{rows}</h2>
<h2 className="my-0 text-indigo-600">{geocodeContext.data.totalRecords}</h2>
<span className="block text-base text-gray-400">addresses</span>
</div>
<div className="flex-1 p-6">
Expand Down
36 changes: 13 additions & 23 deletions src/services/csv.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,20 @@ const fs = require('fs');
import { parse } from 'csv-parse';
import { CSV_PARSE_ERROR } from '../components/InvalidCsv.jsx';

export const getDataSample = async (filePath) => {
const parser = parse({ columns: true, skipEmptyLines: true });

try {
const parsed = fs.createReadStream(filePath).pipe(parser);

//* read the first line to get the file structure
for await (const record of parsed) {
return record;
}
} catch (parseError) {
throw new Error(`${CSV_PARSE_ERROR}: {${parseError.code}} {${parseError.message}}`);
}
};

export const getRecordCount = (filePath) => {
export const validateWithStats = (filePath) => {
return new Promise((resolve, reject) => {
const parser = parse({ columns: true }, function (parseError, data) {
reject(`${CSV_PARSE_ERROR}: {${parseError.code}} {${parseError.message}}`);
if (parseError) {
reject(`${CSV_PARSE_ERROR}: {${parseError.code}} {${parseError.message}}`);

return;
}

resolve(data.length);
if (data.length === 0) {
reject(new Error(`${CSV_PARSE_ERROR}: {INVALID_OR_EMPTY_FILE} {No records found in your file.}`));
}

resolve({ firstRecord: data[0], totalRecords: data.length });
});

try {
Expand All @@ -34,9 +27,6 @@ export const getRecordCount = (filePath) => {
});
};

ipcMain.handle('getSampleFromFile', (_, content) => {
return getDataSample(content);
});
ipcMain.handle('getRecordCount', (_, content) => {
return getRecordCount(content);
ipcMain.handle('validateWithStats', (_, content) => {
return validateWithStats(content);
});
4 changes: 2 additions & 2 deletions src/services/geocode.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ const md5 = require('md5');
import { parse } from 'csv-parse';
import { stringify } from 'csv-stringify';
import got from 'got';
import { getRecordCount } from './csv.js';
import { validateWithStats } from './csv.js';
import { trackEvent } from './analytics.js';

const SPACES = / +/;
Expand Down Expand Up @@ -93,7 +93,7 @@ export const geocode = async (event, { filePath, fields, apiKey, wkid = 26912, s
const stringifier = stringify({ columns: [...columns, 'x', 'y', 'score', 'match_address'], header: true });
stringifier.pipe(writer);

let totalRows = await getRecordCount(filePath);
let totalRows = (await validateWithStats(filePath)).totalRecords;
let rowsProcessed = 0;
let totalScore = 0;
let failures = 0;
Expand Down
3 changes: 1 addition & 2 deletions src/services/preload.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
const { ipcRenderer, contextBridge } = require('electron');

contextBridge.exposeInMainWorld('ugrc', {
getSampleFromFile: (content) => ipcRenderer.invoke('getSampleFromFile', content),
getRecordCount: (content) => ipcRenderer.invoke('getRecordCount', content),
validateWithStats: (content) => ipcRenderer.invoke('validateWithStats', content),
saveConfig: (content) => ipcRenderer.invoke('saveConfig', content),
getConfigItem: (content) => ipcRenderer.invoke('getConfigItem', content),
geocode: (content) => ipcRenderer.invoke('geocode', content),
Expand Down

0 comments on commit 5480091

Please sign in to comment.