Skip to content

Commit

Permalink
Merge pull request gali8#125 from ws233/master
Browse files Browse the repository at this point in the history
Added hOCR support, switched to static framework.
  • Loading branch information
Kevin Conley committed Jan 10, 2015
2 parents 52761d5 + e69afab commit 551238a
Show file tree
Hide file tree
Showing 19 changed files with 580 additions and 84 deletions.
2 changes: 1 addition & 1 deletion Podfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
source 'https://github.com/CocoaPods/Specs.git'
workspace 'Tesseract-OCR-iOS'
xcodeproj 'TestsProject/TestsProject.xcodeproj/'
xcodeproj 'TestsProject/TestsProject.xcodeproj/', 'Coverage' => :debug

target 'TestsProjectTests' do

Expand Down
2 changes: 1 addition & 1 deletion Podfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ DEPENDENCIES:
SPEC CHECKSUMS:
Kiwi: 73e1400209055ee9c8ba78c6012b6b642d0fb9f7

COCOAPODS: 0.35.0
COCOAPODS: 0.36.0.beta.1
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,9 @@
64DDA74E188FD7D10025590D /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 6400DDEA180C5DE000443362 /* Main.storyboard */; };
64DDA75B188FD9140025590D /* tessdata in Resources */ = {isa = PBXBuildFile; fileRef = 64DDA75A188FD9140025590D /* tessdata */; };
64E40AB1180C6D4D00C36DDE /* libstdc++.6.0.9.dylib in Frameworks */ = {isa = PBXBuildFile; fileRef = 64E40AB0180C6D4D00C36DDE /* libstdc++.6.0.9.dylib */; };
73BE4C121A598F86002C15F1 /* TesseractOCR.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 73C0A7BE1A59565100D823D4 /* TesseractOCR.framework */; };
73BE4C131A598F86002C15F1 /* TesseractOCR.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 73C0A7BE1A59565100D823D4 /* TesseractOCR.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
73BE4C421A5D83AB002C15F1 /* TesseractOCR.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 73BE4C411A5D83AB002C15F1 /* TesseractOCR.framework */; };
/* End PBXBuildFile section */

/* Begin PBXCopyFilesBuildPhase section */
73BE4C141A598F86002C15F1 /* Embed Frameworks */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 2147483647;
dstPath = "";
dstSubfolderSpec = 10;
files = (
73BE4C131A598F86002C15F1 /* TesseractOCR.framework in Embed Frameworks */,
);
name = "Embed Frameworks";
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXCopyFilesBuildPhase section */

/* Begin PBXFileReference section */
428615B61845F340005D5A2E /* image_sample.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = image_sample.jpg; sourceTree = "<group>"; };
6400DDD5180C5DE000443362 /* Template Framework Project.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "Template Framework Project.app"; sourceTree = BUILT_PRODUCTS_DIR; };
Expand All @@ -57,6 +42,7 @@
6490748F198A5CD500D728CC /* CoreImage.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreImage.framework; path = System/Library/Frameworks/CoreImage.framework; sourceTree = SDKROOT; };
64DDA75A188FD9140025590D /* tessdata */ = {isa = PBXFileReference; lastKnownFileType = folder; path = tessdata; sourceTree = "<group>"; };
64E40AB0180C6D4D00C36DDE /* libstdc++.6.0.9.dylib */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.dylib"; name = "libstdc++.6.0.9.dylib"; path = "usr/lib/libstdc++.6.0.9.dylib"; sourceTree = SDKROOT; };
73BE4C411A5D83AB002C15F1 /* TesseractOCR.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = TesseractOCR.framework; path = "../build/Debug-iphoneos/TesseractOCR.framework"; sourceTree = "<group>"; };
73C0A7BE1A59565100D823D4 /* TesseractOCR.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; path = TesseractOCR.framework; sourceTree = BUILT_PRODUCTS_DIR; };
/* End PBXFileReference section */

Expand All @@ -65,11 +51,11 @@
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
73BE4C421A5D83AB002C15F1 /* TesseractOCR.framework in Frameworks */,
64907490198A5CD500D728CC /* CoreImage.framework in Frameworks */,
64E40AB1180C6D4D00C36DDE /* libstdc++.6.0.9.dylib in Frameworks */,
6400DDDB180C5DE000443362 /* CoreGraphics.framework in Frameworks */,
6400DDDD180C5DE000443362 /* UIKit.framework in Frameworks */,
73BE4C121A598F86002C15F1 /* TesseractOCR.framework in Frameworks */,
6400DDD9180C5DE000443362 /* Foundation.framework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
Expand Down Expand Up @@ -97,6 +83,7 @@
6400DDD7180C5DE000443362 /* Frameworks */ = {
isa = PBXGroup;
children = (
73BE4C411A5D83AB002C15F1 /* TesseractOCR.framework */,
73C0A7BE1A59565100D823D4 /* TesseractOCR.framework */,
6490748F198A5CD500D728CC /* CoreImage.framework */,
64E40AB0180C6D4D00C36DDE /* libstdc++.6.0.9.dylib */,
Expand Down Expand Up @@ -144,7 +131,6 @@
6400DDD1180C5DE000443362 /* Sources */,
6400DDD2180C5DE000443362 /* Frameworks */,
6400DDD3180C5DE000443362 /* Resources */,
73BE4C141A598F86002C15F1 /* Embed Frameworks */,
);
buildRules = (
);
Expand Down Expand Up @@ -316,12 +302,17 @@
"$(inherited)",
"$(PROJECT_TEMP_DIR)/$(CONFIGURATION)$(EFFECTIVE_PLATFORM_NAME)",
);
GCC_INSTRUMENT_PROGRAM_FLOW_ARCS = NO;
GCC_PRECOMPILE_PREFIX_HEADER = YES;
GCC_PREFIX_HEADER = "Template Framework Project/Template Framework Project-Prefix.pch";
INFOPLIST_FILE = "Template Framework Project/Template Framework Project-Info.plist";
IPHONEOS_DEPLOYMENT_TARGET = 7.0;
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
ONLY_ACTIVE_ARCH = YES;
OTHER_LDFLAGS = (
"-lstdc++",
"-ObjC",
);
PRODUCT_NAME = "$(TARGET_NAME)";
WRAPPER_EXTENSION = app;
};
Expand All @@ -336,12 +327,17 @@
"$(inherited)",
"$(PROJECT_TEMP_DIR)/$(CONFIGURATION)$(EFFECTIVE_PLATFORM_NAME)",
);
GCC_INSTRUMENT_PROGRAM_FLOW_ARCS = NO;
GCC_PRECOMPILE_PREFIX_HEADER = YES;
GCC_PREFIX_HEADER = "Template Framework Project/Template Framework Project-Prefix.pch";
INFOPLIST_FILE = "Template Framework Project/Template Framework Project-Info.plist";
IPHONEOS_DEPLOYMENT_TARGET = 7.0;
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
ONLY_ACTIVE_ARCH = NO;
OTHER_LDFLAGS = (
"-lstdc++",
"-ObjC",
);
PRODUCT_NAME = "$(TARGET_NAME)";
WRAPPER_EXTENSION = app;
};
Expand Down
89 changes: 87 additions & 2 deletions Tesseract OCR iOS.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -581,8 +581,8 @@
COPY_PHASE_STRIP = NO;
GCC_C_LANGUAGE_STANDARD = gnu99;
GCC_DYNAMIC_NO_PIC = NO;
GCC_GENERATE_TEST_COVERAGE_FILES = YES;
GCC_INSTRUMENT_PROGRAM_FLOW_ARCS = YES;
GCC_GENERATE_TEST_COVERAGE_FILES = NO;
GCC_INSTRUMENT_PROGRAM_FLOW_ARCS = NO;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
Expand Down Expand Up @@ -664,6 +664,7 @@
"$(inherited)",
"$(PROJECT_DIR)/TesseractOCR/lib",
);
MACH_O_TYPE = staticlib;
MTL_ENABLE_DEBUG_INFO = YES;
PRODUCT_NAME = "$(TARGET_NAME)";
SKIP_INSTALL = YES;
Expand Down Expand Up @@ -704,6 +705,7 @@
"$(inherited)",
"$(PROJECT_DIR)/TesseractOCR/lib",
);
MACH_O_TYPE = staticlib;
MTL_ENABLE_DEBUG_INFO = NO;
PRODUCT_NAME = "$(TARGET_NAME)";
SKIP_INSTALL = YES;
Expand All @@ -714,13 +716,95 @@
};
name = Release;
};
F4EE1C791A612ADC007BB075 /* Coverage */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
CLANG_CXX_LIBRARY = "compiler-default";
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
GCC_C_LANGUAGE_STANDARD = gnu99;
GCC_DYNAMIC_NO_PIC = NO;
GCC_GENERATE_TEST_COVERAGE_FILES = YES;
GCC_INSTRUMENT_PROGRAM_FLOW_ARCS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_SYMBOLS_PRIVATE_EXTERN = NO;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
INFOPLIST_FILE = "";
IPHONEOS_DEPLOYMENT_TARGET = 5.0;
ONLY_ACTIVE_ARCH = YES;
OTHER_LDFLAGS = (
"-ObjC",
"-lstdc++",
);
SDKROOT = iphoneos;
};
name = Coverage;
};
F4EE1C7A1A612ADC007BB075 /* Coverage */ = {
isa = XCBuildConfiguration;
buildSettings = {
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_UNREACHABLE_CODE = YES;
"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
CURRENT_PROJECT_VERSION = 1;
DEFINES_MODULE = YES;
DYLIB_COMPATIBILITY_VERSION = 1;
DYLIB_CURRENT_VERSION = 1;
DYLIB_INSTALL_NAME_BASE = "@rpath";
ENABLE_STRICT_OBJC_MSGSEND = YES;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
INFOPLIST_FILE = "TesseractOCR/TesseractOCR-Info.plist";
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
IPHONEOS_DEPLOYMENT_TARGET = 7.0;
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks";
LIBRARY_SEARCH_PATHS = (
"$(inherited)",
"$(PROJECT_DIR)/TesseractOCR/lib",
);
MACH_O_TYPE = staticlib;
MTL_ENABLE_DEBUG_INFO = YES;
PRODUCT_NAME = "$(TARGET_NAME)";
SKIP_INSTALL = YES;
TARGETED_DEVICE_FAMILY = "1,2";
VALID_ARCHS = "arm64 armv7 armv7s i386 x86_64";
VERSIONING_SYSTEM = "apple-generic";
VERSION_INFO_PREFIX = "";
};
name = Coverage;
};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
64F74CC4172FD75F0068E657 /* Build configuration list for PBXProject "Tesseract OCR iOS" */ = {
isa = XCConfigurationList;
buildConfigurations = (
64F74CD5172FD75F0068E657 /* Debug */,
F4EE1C791A612ADC007BB075 /* Coverage */,
64F74CD6172FD75F0068E657 /* Release */,
);
defaultConfigurationIsVisible = 0;
Expand All @@ -730,6 +814,7 @@
isa = XCConfigurationList;
buildConfigurations = (
73C0A78F1A592C2B00D823D4 /* Debug */,
F4EE1C7A1A612ADC007BB075 /* Coverage */,
73C0A7901A592C2B00D823D4 /* Release */,
);
defaultConfigurationIsVisible = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES"
buildConfiguration = "Debug">
buildConfiguration = "Coverage">
<Testables>
<TestableReference
skipped = "NO">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,6 @@
ReferencedContainer = "container:TestsProject/TestsProject.xcodeproj">
</BuildableReference>
</BuildActionEntry>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "64A0293917307C7E002B12E7"
BuildableName = "TesseractOCRAggregate"
BlueprintName = "TesseractOCRAggregate"
ReferencedContainer = "container:Tesseract OCR iOS.xcodeproj">
</BuildableReference>
</BuildActionEntry>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
Expand All @@ -54,7 +40,7 @@
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES"
buildConfiguration = "Debug">
buildConfiguration = "Coverage">
<Testables>
<TestableReference
skipped = "NO">
Expand Down Expand Up @@ -82,7 +68,7 @@
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
buildConfiguration = "Debug"
buildConfiguration = "Coverage"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
allowLocationSimulation = "YES">
Expand Down
62 changes: 43 additions & 19 deletions TesseractOCR/G8Tesseract.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@
@property (nonatomic, copy) NSString* language;

/**
* The path to the tessdata file, if it was specified in a call to initWithLanguage:configDictionary:configFileNames:cachesRelatedDataPath:engineMode: as a cachesRelatedDataPath
* Otherwise it's supposed that the tessdata folder is located in the application bundle
* The absolute path to the tessdata folder, which may exist in either the
* application bundle or in the Caches directory depending on the argument to
* `cachesRelatedDataPath` in the designated initializer.
*/
@property (nonatomic, readonly, copy) NSString *absoluteDataPath;

Expand Down Expand Up @@ -112,6 +113,19 @@
*/
@property (nonatomic, readonly) NSString *recognizedText;

/**
* Make an HTML-formatted string with hOCR markup from the internal Tesseract
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
*
* @param pageNumber The page number within the image of interest. If you
* aren't using a multipage image or don't know what this
* means, use `0` for `pageNumber`.
*
* @return The HTML-formatted string with hOCR markup.
*/
- (NSString *)recognizedHOCRForPageNumber:(int)pageNumber;

/**
* The result of Tesseract's orientation analysis of the target image. See
* `G8Orientation` in G8Constants.h for the possible orientations.
Expand Down Expand Up @@ -168,7 +182,7 @@

/**
* Retrieve Tesseract's recognition result based on a provided resolution.
* For, example for the pageIteratorLevel == G8PageIteratorLevelSymbol it returns
* For example, the pageIteratorLevel == G8PageIteratorLevelSymbol returns
* an array of `G8RecognizedBlock`'s representing the characters recognized
* in the target image, including the bounding boxes for each character.
*
Expand All @@ -177,8 +191,9 @@
* resolution options.
*
* @return An array of `G8RecognizedBlock`'s, each containing a confidence
* value and a bounding box for the text it represents. See G8RecognizedBlock.h for more
* information about the available fields for this data structure.
* value and a bounding box for the text it represents. See
* G8RecognizedBlock.h for more information about the available fields
* for this data structure.
*/
- (NSArray *)recognizedBlocksByIteratorLevel:(G8PageIteratorLevel)pageIteratorLevel;

Expand Down Expand Up @@ -231,17 +246,25 @@
/**
* Initialize Tesseract with the provided language and engine mode.
*
* @param language The language to use in recognition. See `language`.
* @param configDictionary A dictionary of the config variables
* @param configFileNames An array of file names containing key-value config pairs. All the config
* variables can be init only and debug time both. Furthermore they could be
* specified at the same time, in such case tesseract will get variables from
* every file and dictionary all together.
* The files are searched into two folders, which are tessdata/tessconfigs and tessdata/configs
* @param cachesRelatedPath If the cachesRelatedDataPath is specified, the whole content of the tessdata from the
* application bundle is copied to the Library/Caches/cachesRelatedDataPath/tessdata
* and tesseract is initialized with that path.
* @param engineMode The engine mode to use in recognition. See `engineMode`.
* @param language The language to use in recognition. See
* `language`.
* @param configDictionary A dictionary of config variables to set.
* @param configFileNames An array of file names containing key-value
* config pairs. Config settings can be set at
* initialization or run-time. Furthermore, they
* could be specified at the same time, in which
* case Tesseract will get variables from every
* config file as well as the dictionary.
* The config files must exist in one of two
* possible folders: tessdata/tessconfigs or
* tessdata/configs.
* @param cachesRelatedPath If the cachesRelatedDataPath is specified, the
* whole contents of the tessdata folder in the
* application bundle will be copied to
* Library/Caches/cachesRelatedDataPath/tessdata
* and Tesseract will be set to use that path.
* @param engineMode The engine mode to use in recognition. See
* `engineMode`.
*
* @return The initialized Tesseract object, or `nil` if there was an error.
*/
Expand All @@ -262,12 +285,13 @@
- (void)setVariableValue:(NSString *)value forKey:(NSString *)key;

/**
* Returns a Tesseract variable for the given key. See G8TesseractParameters.h for the available
* options.
* Returns a Tesseract variable for the given key. See G8TesseractParameters.h
* for the available options.
*
* @param key The option to get.
*
* @return returns the variable value for the given key, if it's beeb set. nil otherwise.
* @return Returns the variable value for the given key, if it's been set.
* nil otherwise.
*/
- (NSString*)variableValueForKey:(NSString *)key;

Expand Down
Loading

0 comments on commit 551238a

Please sign in to comment.