forked from gali8/Tesseract-OCR-iOS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTesseract.mm
executable file
·171 lines (133 loc) · 4.58 KB
/
Tesseract.mm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
//
// Tesseract.mm
// Tesseract
//
// Created by Loïs Di Qual on 24/09/12.
// Copyright (c) 2012 Loïs Di Qual.
// Under MIT License. See 'LICENCE' for more informations.
//
#import "Tesseract.h"
#import "baseapi.h"
#import "environ.h"
#import "pix.h"
namespace tesseract {
class TessBaseAPI;
};
@interface Tesseract () {
tesseract::TessBaseAPI* _tesseract;
uint32_t* _pixels;
}
@end
@implementation Tesseract
+ (NSString *)version {
return [NSString stringWithFormat:@"%s", tesseract::TessBaseAPI::Version()];
}
- (id)initWithDataPath:(NSString *)dataPath language:(NSString *)language {
self = [super init];
if (self) {
_dataPath = dataPath;
_language = language;
_variables = [[NSMutableDictionary alloc] init];
[self copyDataToDocumentsDirectory];
_tesseract = new tesseract::TessBaseAPI();
BOOL success = [self initEngine];
if (!success) {
return NO;
}
}
return self;
}
- (BOOL)initEngine {
int returnCode = _tesseract->Init([_dataPath UTF8String], [_language UTF8String]);
return (returnCode == 0) ? YES : NO;
}
- (void)copyDataToDocumentsDirectory {
// Useful paths
NSFileManager *fileManager = [NSFileManager defaultManager];
NSArray *documentPaths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES);
NSString *documentPath = ([documentPaths count] > 0) ? [documentPaths objectAtIndex:0] : nil;
NSString *dataPath = [documentPath stringByAppendingPathComponent:_dataPath];
// NSString *dataPath = [[NSBundle mainBundle] pathForResource:@"grc" ofType:@"traineddata"];
//
NSLog(@"DATAPATH %@", dataPath);
// Copy data in Doc Directory
if (![fileManager fileExistsAtPath:dataPath])
{
[fileManager createDirectoryAtPath:dataPath withIntermediateDirectories:YES attributes:nil error:NULL];
}
NSBundle *bundle = [NSBundle bundleForClass:[self class]];
NSString *tessdataPath = [bundle pathForResource:_language ofType:@"traineddata"];
NSString *destinationPath = [dataPath stringByAppendingPathComponent:[tessdataPath lastPathComponent]];
if(![fileManager fileExistsAtPath:destinationPath])
{
if (tessdataPath)
{
NSError *error = nil;
NSLog(@"trovato a %@", tessdataPath);
NSLog(@"lo copio in %@", destinationPath);
[fileManager copyItemAtPath:tessdataPath toPath:destinationPath error:&error];
if(error)
NSLog(@"ERRORE! %@", error.description);
}
}
setenv("TESSDATA_PREFIX", [[documentPath stringByAppendingString:@"/"] UTF8String], 1);
}
- (void)setVariableValue:(NSString *)value forKey:(NSString *)key {
/*
* Example:
* _tesseract->SetVariable("tessedit_char_whitelist", "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ");
* _tesseract->SetVariable("language_model_penalty_non_freq_dict_word", "0");
* _tesseract->SetVariable("language_model_penalty_non_dict_word ", "0");
*/
[_variables setValue:value forKey:key];
_tesseract->SetVariable([key UTF8String], [value UTF8String]);
}
- (void)loadVariables {
for (NSString* key in _variables) {
NSString* value = [_variables objectForKey:key];
_tesseract->SetVariable([key UTF8String], [value UTF8String]);
}
}
- (BOOL)setLanguage:(NSString *)language {
_language = language;
int returnCode = [self initEngine];
if (returnCode != 0) return NO;
/*
* "WARNING: On changing languages, all Tesseract parameters
* are reset back to their default values."
*/
[self loadVariables];
return YES;
}
- (BOOL)recognize {
int returnCode = _tesseract->Recognize(NULL);
return (returnCode == 0) ? YES : NO;
}
- (NSString *)recognizedText {
char* utf8Text = _tesseract->GetUTF8Text();
return [NSString stringWithUTF8String:utf8Text];
}
- (void)setImage:(UIImage *)image
{
free(_pixels);
CGSize size = [image size];
int width = size.width;
int height = size.height;
if (width <= 0 || height <= 0) {
return;
}
_pixels = (uint32_t *) malloc(width * height * sizeof(uint32_t));
// Clear the pixels so any transparency is preserved
memset(_pixels, 0, width * height * sizeof(uint32_t));
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
// Create a context with RGBA _pixels
CGContextRef context = CGBitmapContextCreate(_pixels, width, height, 8, width * sizeof(uint32_t), colorSpace,
kCGBitmapByteOrder32Little | kCGImageAlphaPremultipliedLast);
// Paint the bitmap to our context which will fill in the _pixels array
CGContextDrawImage(context, CGRectMake(0, 0, width, height), [image CGImage]);
// We're done with the context and color space
CGContextRelease(context);
CGColorSpaceRelease(colorSpace);
_tesseract->SetImage((const unsigned char *) _pixels, width, height, sizeof(uint32_t), width * sizeof(uint32_t));
}
@end