Skip to content

Commit

Permalink
Add Evernote Import Parser (#940)
Browse files Browse the repository at this point in the history
* Adding core importer.

* Adding trashedNotes import.

* First WIP of evernote importing

Todo: Tags!

* Pull in the systemTags if they're available (for markdown)

Cleaner (and fixed!) check for no activeNotes or trashedNotes props.

* Also import the tags to the tag bucket, if there's any present.

* Added a boolean argument to determine whether an imported note should be marked deleted. Probably a better way to do this?

* README cleanup!

* Use fat arrow instead of `bind` in tests.

* Adding an optional argument to `importNotes` to signify which importer the note came from.

* Adding support for tags, and pass tagBucket through to the Simplenote importer.

* Cleaning up importer class, configured for file path to be passed in.

* Removing importedFrom property because the note schema doesn't allow it. Fixed bug where creationDate could be `NaN`.

* Cleaning up app.jsx from my testing UI :)

* Adding `pinned` and `markdown` props to the systemTags if present.

Fixed date parsing to get correct timestamp.

* Updated to a class that emits status events for things like progress or errors encountered.

* Use `importNote` from the core importer so we don't have to create a large array.

* Wrapping the core importer in a class, which also now emits errors instead of throwing.

* Use the new `CoreImporter` class.

* Add a default empty tags array if none was present in the imported note.

* Added some formatting fixes for evernote imports.

* Tweaked `importEvernotes` to take an array of `File` objects. We only process the first file in the array for this importer.

* * Renamed `importEvernotes` to `importNotes` (will do the same for text file impoter).
* Class constructor takes an object constructor so we can't muck up the bucket assignments.

* Constructor now takes an object so we don't muck up the bucket assignments.

* Add a markdown override option to the core importer.

* Add an options object to be passed to `importNote` of the core importer.

* Add fallback object to fix exception if no options are passed.

* Updating `fs` to be null by default to fix browser support.
  • Loading branch information
roundhill authored and mirka committed Nov 2, 2018
1 parent 6be0efa commit cea1869
Show file tree
Hide file tree
Showing 2 changed files with 164 additions and 0 deletions.
163 changes: 163 additions & 0 deletions lib/utils/import/evernote/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import { EventEmitter } from 'events';
import { createStream } from 'sax';
import moment from 'moment';
import CoreImporter from '../';
import { endsWith, get, has } from 'lodash';

let fs = null;
const isElectron = has(window, 'process.type');
if (isElectron) {
fs = __non_webpack_require__('fs'); // eslint-disable-line no-undef
}

class EvernoteImporter extends EventEmitter {
constructor({ noteBucket, tagBucket, options }) {
super();
this.noteBucket = noteBucket;
this.tagBucket = tagBucket;
this.options = options;
}

importNotes = filesArray => {
if (!filesArray || filesArray.length === 0) {
this.emit('status', 'error', 'Invalid Evernote export file.');
}

// We will always process only the first item in the array
const file = filesArray[0];
if (!file || !file.path) {
this.emit('status', 'error', 'Could not find Evernote export file.');
}

if (!file.path || !endsWith(file.path.toLowerCase(), '.enex')) {
this.emit('status', 'error', 'An Evernote .enex file is required.');
return;
}

const saxStream = createStream(true, false);
const parser = new DOMParser();
const coreImporter = new CoreImporter({
noteBucket: this.noteBucket,
tagBucket: this.tagBucket,
});
let currentNote = {}; // The current note we are parsing
let importedNoteCount = 0;

saxStream.on('error', function() {
this.emit('status', 'error', 'Error processing Evernote data.');
});

saxStream.on('opentag', node => {
// The <note> tag signifies that we should parse another note
if (node.name === 'note') {
currentNote = { tags: [] };
}
});

saxStream.on('cdata', text => {
// Note content in evernote exports lives in CDATA
const htmlDoc = parser.parseFromString(text, 'text/html');

// We're only interested in 'note' doctypes, like 'en-note'
if (!endsWith(get(htmlDoc, 'doctype.name', ''), 'note')) {
return;
}

const strippedText = this.htmlToPlainText(htmlDoc.documentElement);
if (strippedText !== '') {
currentNote.content += '\n' + strippedText;
}
});

saxStream.on('text', text => {
if (!text) {
return;
}

const tagName = saxStream._parser.tagName;
switch (tagName) {
case 'title':
// Evernote titles appear to be plain text only, we can take it as-is
currentNote.content = text;
break;
// We need to convert the date to a Unix timestamp
case 'created':
currentNote.creationDate = this.getConvertedDate(text);
break;
case 'updated':
currentNote.modificationDate = this.getConvertedDate(text);
break;
case 'tag':
currentNote.tags.push(text);
}
});

saxStream.on('closetag', node => {
// Add the currentNote to the array
if (node === 'note') {
coreImporter.importNote(currentNote, this.options);
importedNoteCount++;
this.emit('status', 'progress', importedNoteCount);
}
});

saxStream.on('end', () => {
if (importedNoteCount === 0) {
this.emit('status', 'error', 'No notes were found to import.');
return;
}

this.emit('status', 'complete', importedNoteCount);
});

// Read the file via stream
fs.createReadStream(file.path).pipe(saxStream);
};

getConvertedDate = dateString => {
let convertedDate = moment(dateString).unix();
if (isNaN(convertedDate)) {
// Fall back to current date
convertedDate = Date.now();
}

return convertedDate;
};

// From: https://stackoverflow.com/a/44952893
// Modified to work properly with Evernote HTML formatting
htmlToPlainText = (n, isInnerNode) => {
const self = this;
let plainText = '';
// Skip `media` tags (like <en-media>)
if (endsWith(n.nodeName, 'media')) {
return '';
}

if (n.nodeType === 3) {
plainText = n.nodeValue;
} else {
let partial = '';
const nodeName = n.nodeName.toLowerCase();
if (
(isInnerNode && nodeName === 'div') ||
nodeName.match(/h[1-6]/g) ||
nodeName === 'p' ||
nodeName === 'ul' ||
nodeName === 'li' ||
nodeName === 'br'
) {
partial += '\n';
}

for (let i = 0; i < n.childNodes.length; i++) {
partial += self.htmlToPlainText(n.childNodes[i], true);
}
plainText = partial;
}

return plainText;
};
}

export default EvernoteImporter;
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@
"redux-localstorage": "0.4.1",
"redux-thunk": "2.2.0",
"sanitize-filename": "1.6.1",
"sax": "1.2.4",
"showdown": "1.8.6",
"simperium": "0.3.3",
"valid-url": "1.0.9"
Expand Down

0 comments on commit cea1869

Please sign in to comment.