Skip to content
This repository was archived by the owner on Feb 15, 2022. It is now read-only.

Commit 7f67e23

Browse files
Removed Default Convert Options
1 parent 1fda6f9 commit 7f67e23

File tree

5 files changed

+48
-21
lines changed

5 files changed

+48
-21
lines changed

coverage/badge-branches.svg

Lines changed: 1 addition & 1 deletion
Loading

coverage/badge-lines.svg

Lines changed: 1 addition & 1 deletion
Loading

coverage/badge-statements.svg

Lines changed: 1 addition & 1 deletion
Loading

src/index.ts

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,6 @@ class Ocr {
145145
args.push('-density');
146146
args.push(`${options && options.convertDensity ? options.convertDensity : 300}`);
147147
args.push(filePath);
148-
args.push('-depth');
149-
args.push('8');
150-
args.push('-strip');
151-
args.push('-background');
152-
args.push('white');
153-
args.push('-alpha');
154-
args.push('off');
155148
if (options && options.convertArgs) {
156149
// Parse all provided options to command line arguments
157150
for (const [key, value] of Object.entries(options.convertArgs)) {

test/extract-text.spec.ts

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,18 @@ import * as temp from 'temp';
33

44
import { ExtractTextOptions, Ocr } from '../src/index';
55

6+
const convertArgs = {
7+
quality: '100',
8+
trim: '',
9+
depth: '8',
10+
strip: '',
11+
background: 'white',
12+
alpha: 'off'
13+
};
14+
15+
// tslint:disable object-literal-key-quotes
16+
const tesseractArgs = { '-psm': 6, c: 'preserve_interword_spaces=1' };
17+
618
describe('Extract Text Tests', () => {
719
it('should be able to extract pdf text from single-page.pdf', async (done) => {
820
jest.setTimeout(15 * 1000);
@@ -11,7 +23,10 @@ describe('Extract Text Tests', () => {
1123
const pdfPath = path.join(__dirname, relativePath);
1224

1325
try {
14-
const result: string = await Ocr.extractText(pdfPath);
26+
const options: ExtractTextOptions = {
27+
convertArgs
28+
};
29+
const result: string = await Ocr.extractText(pdfPath, options);
1530
expect(result).toBeDefined();
1631
expect(result).toContain('00001-001-0002');
1732
} catch (error) {
@@ -29,7 +44,10 @@ describe('Extract Text Tests', () => {
2944
const pdfPath = path.join(__dirname, relativePath);
3045

3146
try {
32-
const result: string = await Ocr.extractText(pdfPath);
47+
const options: ExtractTextOptions = {
48+
convertArgs
49+
};
50+
const result: string = await Ocr.extractText(pdfPath, options);
3351
expect(result).toBeDefined();
3452
expect(result).toContain('00001-001-0002');
3553
} catch (error) {
@@ -41,13 +59,16 @@ describe('Extract Text Tests', () => {
4159
});
4260

4361
it('should be able to extract pdf text from multi-page.pdf', async (done) => {
44-
jest.setTimeout(15 * 1000);
62+
jest.setTimeout(25 * 1000);
4563
const fileName = 'multi-page.pdf';
4664
const relativePath = path.join('sample', fileName);
4765
const pdfPath = path.join(__dirname, relativePath);
4866

4967
try {
50-
const options: ExtractTextOptions = { convertDensity: 400, convertArgs: { trim: '' } };
68+
const options: ExtractTextOptions = {
69+
convertDensity: 600,
70+
convertArgs
71+
};
5172
const result: string = await Ocr.extractText(pdfPath, options);
5273
expect(result).toBeDefined();
5374
expect(result).toContain('National Airspace System');
@@ -67,7 +88,11 @@ describe('Extract Text Tests', () => {
6788
const pdfPath = path.join(__dirname, relativePath);
6889

6990
try {
70-
const options: ExtractTextOptions = { pdfToTextArgs: { f: 1, l: 4 } };
91+
const options: ExtractTextOptions = {
92+
pdfToTextArgs: { f: 1, l: 4 },
93+
convertDensity: 600,
94+
convertArgs
95+
};
7196
const result: string = await Ocr.extractText(pdfPath, options);
7297
expect(result).toBeDefined();
7398
expect(result).toContain('TraceMonkey');
@@ -81,17 +106,19 @@ describe('Extract Text Tests', () => {
81106
});
82107

83108
it('should be able to extract text from sample.png', async (done) => {
84-
jest.setTimeout(15 * 1000);
109+
jest.setTimeout(25 * 1000);
85110
const fileName = 'sample.png';
86111
const relativePath = path.join('sample', fileName);
87112
const pngPath = path.join(__dirname, relativePath);
88113

89114
try {
90115
const tmpDir = temp.mkdirSync('tmp');
91-
// tslint:disable object-literal-key-quotes
116+
92117
const options: ExtractTextOptions = {
118+
convertDensity: 600,
119+
convertArgs,
93120
tesseractLang: 'eng',
94-
tesseractArgs: { '-psm': 6, c: 'preserve_interword_spaces=1' }
121+
tesseractArgs
95122
};
96123
const result: string = await Ocr.invokeImageOcr(tmpDir, pngPath, options);
97124
expect(result).toBeDefined();
@@ -105,17 +132,24 @@ describe('Extract Text Tests', () => {
105132
});
106133

107134
it('should be able to extract text from sample-low.jpg', async (done) => {
108-
jest.setTimeout(15 * 1000);
135+
jest.setTimeout(25 * 1000);
109136
const fileName = 'sample-low.jpg';
110137
const relativePath = path.join('sample', fileName);
111138
const jpgPath = path.join(__dirname, relativePath);
112139

113140
try {
114141
const options: ExtractTextOptions = {
115142
convertDensity: 600,
116-
convertArgs: { trim: '' },
143+
convertArgs: {
144+
...convertArgs,
145+
verbose: '',
146+
flatten: '',
147+
contrast: '',
148+
'auto-level': '',
149+
sharpen: '0x4.0'
150+
},
117151
tesseractLang: 'eng',
118-
tesseractArgs: { '-psm': 6 }
152+
tesseractArgs
119153
};
120154
const result: string = await Ocr.extractText(jpgPath, options);
121155
expect(result).toBeDefined();

0 commit comments

Comments
 (0)