-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
125 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
{ | ||
"extraScripts": [] | ||
"extraScripts": [ | ||
"index/pdf.ts", | ||
"database.ts" | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import { Database, sqlite3 } from 'sqlite3'; | ||
|
||
const SCHEMA_VERSION = 1; | ||
|
||
function query(db: Database, query, ...params) { | ||
return new Promise((resolve, reject) => { | ||
db.run(query, params, (err, rows) => { | ||
if (!!err) { | ||
reject(err); | ||
} else { | ||
resolve(rows); | ||
} | ||
}); | ||
}); | ||
} | ||
|
||
function run(db: Database, query, ...params) { | ||
return new Promise(function(resolve, reject) { | ||
db.run(query, params, function(err) { | ||
if(err) { | ||
reject(err.message); | ||
} else { | ||
resolve(true); | ||
} | ||
}); | ||
}); | ||
} | ||
|
||
async function initDb(path: string, sqlite3: sqlite3): Promise<Database> { | ||
const db: Database = new sqlite3.Database(`${path}/resources.sqlite`); | ||
await run(db, 'CREATE TABLE IF NOT EXISTS settings (name TEXT PRIMARY KEY, value TEXT)'); | ||
const version = await query(db, 'SELECT value FROM settings WHERE name = ?', 'version'); | ||
if (version !== SCHEMA_VERSION) { | ||
// rebuild index | ||
await run(db, 'DROP TABLE IF EXISTS resources_fts'); | ||
await run(db, 'CREATE VIRTUAL TABLE IF NOT EXISTS resources_fts USING fts5(id, title, text)'); | ||
|
||
// create or clean index_time | ||
await run(db, 'CREATE TABLE IF NOT EXISTS index_time (id TEXT PRIMARY KEY, index_time INTEGER)'); | ||
await run(db, 'DELETE FROM index_time'); | ||
|
||
await run(db, 'VACUUM'); | ||
|
||
await run(db, `INSERT INTO settings VALUES('version', ${SCHEMA_VERSION}) ON CONFLICT(name) DO UPDATE SET value=${SCHEMA_VERSION}`); | ||
} | ||
return db; | ||
} | ||
|
||
function addToIndex(db: Database, title, id, text) { | ||
db.run('INSERT INTO resources_fts VALUES(?, ?, ?)', id, title, text); | ||
db.run('INSERT INTO index_time VALUES(?, ?) ON CONFLICT(id) DO UPDATE SET index_time = ?', id, Date.now() / 1000, Date.now() / 1000); | ||
} | ||
|
||
export { initDb, run, query, addToIndex }; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import * as pdfjs from 'pdfjs-dist/es5/build/pdf.js'; | ||
import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.entry'; | ||
|
||
pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorker; | ||
|
||
async function pdfToText(path) { | ||
const pdf = await pdfjs.getDocument(path).promise; | ||
let complete = 0; | ||
let total = pdf.numPages; | ||
let pages = {}; | ||
for (let pagei = 1; pagei <= total; pagei++) { | ||
const page = await pdf.getPage(pagei); | ||
let pageNumber = page.pageNumber; | ||
const textContent = await page.getTextContent(); | ||
if (null != textContent.items) { | ||
let page_text = ""; | ||
let last_item = null; | ||
for (let itemsi = 0; itemsi < textContent.items.length; itemsi++) { | ||
let item = textContent.items[itemsi]; | ||
if (last_item != null && last_item.str[last_item.str.length - 1] != ' ') { | ||
let itemX = item.transform[5] | ||
let lastItemX = last_item.transform[5] | ||
let itemY = item.transform[4] | ||
let lastItemY = last_item.transform[4] | ||
if (itemX < lastItemX) | ||
page_text += "\r\n"; | ||
else if (itemY != lastItemY && (last_item.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null)) | ||
page_text += ' '; | ||
} | ||
|
||
page_text += item.str; | ||
last_item = item; | ||
} | ||
pages[pageNumber] = page_text + "\n\n"; | ||
} | ||
++complete; | ||
if (complete == total) { | ||
let full_text = ""; | ||
let num_pages = Object.keys(pages).length; | ||
for (let pageNum = 1; pageNum <= num_pages; pageNum++) | ||
full_text += pages[pageNum]; | ||
return full_text; | ||
} | ||
} | ||
return ''; | ||
} | ||
|
||
export { pdfToText }; |