Updated Tesseract to 3.03 and Leptonica to 1.70.

expond · Oct 9, 2014 · 32e21dd · 32e21dd
1 parent ee6061d
commit 32e21dd
Show file tree

Hide file tree

Showing 351 changed files with 41,152 additions and 2,105 deletions.
diff --git a/Tesseract OCR iOS.xcodeproj/project.pbxproj b/Tesseract OCR iOS.xcodeproj/project.pbxproj
@@ -787,7 +787,7 @@
 		64F74CC1172FD75F0068E657 /* Project object */ = {
 			isa = PBXProject;
 			attributes = {
-				LastUpgradeCheck = 0510;
+				LastUpgradeCheck = 0600;
 				ORGANIZATIONNAME = "Daniele Galiotto - www.g8production.com";
 			};
 			buildConfigurationList = 64F74CC4172FD75F0068E657 /* Build configuration list for PBXProject "Tesseract OCR iOS" */;
@@ -904,7 +904,7 @@
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;
 				TARGETED_DEVICE_FAMILY = "1,2";
-				VALID_ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT) x86_64 i386";
+				VALID_ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT) i386 x86_64";
 				WRAPPER_EXTENSION = framework;
 			};
 			name = Debug;
@@ -937,7 +937,7 @@
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SDKROOT = iphoneos;
 				TARGETED_DEVICE_FAMILY = "1,2";
-				VALID_ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT) x86_64 i386";
+				VALID_ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT) i386 x86_64";
 				WRAPPER_EXTENSION = framework;
 			};
 			name = Release;
@@ -996,7 +996,7 @@
 				GCC_WARN_UNINITIALIZED_AUTOS = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				IPHONEOS_DEPLOYMENT_TARGET = 5.0;
-				ONLY_ACTIVE_ARCH = NO;
+				ONLY_ACTIVE_ARCH = YES;
 				OTHER_LDFLAGS = (
 					"-ObjC",
 					"-lstdc++",

diff --git a/TesseractOCR/include/leptonica/allheaders.h b/TesseractOCR/include/leptonica/allheaders.h
diff --git a/TesseractOCR/include/leptonica/alltypes.h b/TesseractOCR/include/leptonica/alltypes.h
@@ -10,7 +10,7 @@
  -     copyright notice, this list of conditions and the following
  -     disclaimer in the documentation and/or other materials
  -     provided with the distribution.
- - 
+ -
  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -30,31 +30,34 @@
     /* Standard */
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdarg.h>
 
     /* General and configuration defs */
 #include "environ.h"
 
-    /* Imaging */
+    /* Generic and non-image-specific containers */
 #include "array.h"
-#include "arrayaccess.h"
 #include "bbuffer.h"
+#include "heap.h"
+#include "list.h"
+#include "ptra.h"
+#include "queue.h"
+#include "stack.h"
+
+    /* Imaging */
+#include "arrayaccess.h"
 #include "bmf.h"
 #include "ccbord.h"
 #include "dewarp.h"
 #include "gplot.h"
-#include "heap.h"
 #include "imageio.h"
 #include "jbclass.h"
-#include "list.h"
 #include "morph.h"
 #include "pix.h"
-#include "ptra.h"
-#include "queue.h"
+#include "recog.h"
 #include "regutils.h"
 #include "sudoku.h"
-#include "stack.h"
 #include "watershed.h"
 
 
 #endif /* LEPTONICA_ALLTYPES_H */
-
diff --git a/TesseractOCR/include/leptonica/array.h b/TesseractOCR/include/leptonica/array.h
@@ -10,7 +10,7 @@
  -     copyright notice, this list of conditions and the following
  -     disclaimer in the documentation and/or other materials
  -     provided with the distribution.
- - 
+ -
  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -40,10 +40,12 @@
  *
  *  Contains definitions for:
  *      Numa interpolation flags
+ *      Numa and FPix border flags
+ *      Numa data type conversion to string
  */
 
 
-/*------------------------------------------------------------------------* 
+/*------------------------------------------------------------------------*
  *                             Array Structs                              *
  *------------------------------------------------------------------------*/
 
@@ -142,10 +144,9 @@ struct L_Bytea
 typedef struct L_Bytea L_BYTEA;
 
 
-/*------------------------------------------------------------------------* 
+/*------------------------------------------------------------------------*
  *                              Array flags                               *
  *------------------------------------------------------------------------*/
-
     /* Flags for interpolation in Numa */
 enum {
     L_LINEAR_INTERP = 1,        /* linear     */
@@ -159,5 +160,11 @@ enum {
     L_MIRRORED_BORDER = 3       /* mirrored                                  */
 };
 
+    /* Flags for data type converted from Numa */
+enum {
+    L_INTEGER_VALUE = 1,        /* convert to integer  */
+    L_FLOAT_VALUE = 2           /* convert to float    */
+};
+
 
 #endif  /* LEPTONICA_ARRAY_H */
diff --git a/TesseractOCR/include/leptonica/arrayaccess.h b/TesseractOCR/include/leptonica/arrayaccess.h
diff --git a/TesseractOCR/include/leptonica/bbuffer.h b/TesseractOCR/include/leptonica/bbuffer.h
@@ -10,7 +10,7 @@
  -     copyright notice, this list of conditions and the following
  -     disclaimer in the documentation and/or other materials
  -     provided with the distribution.
- - 
+ -
  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

diff --git a/TesseractOCR/include/leptonica/bilateral.h b/TesseractOCR/include/leptonica/bilateral.h
@@ -0,0 +1,130 @@
+/*====================================================================*
+ -  Copyright (C) 2001 Leptonica.  All rights reserved.
+ -
+ -  Redistribution and use in source and binary forms, with or without
+ -  modification, are permitted provided that the following conditions
+ -  are met:
+ -  1. Redistributions of source code must retain the above copyright
+ -     notice, this list of conditions and the following disclaimer.
+ -  2. Redistributions in binary form must reproduce the above
+ -     copyright notice, this list of conditions and the following
+ -     disclaimer in the documentation and/or other materials
+ -     provided with the distribution.
+ -
+ -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
+ -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *====================================================================*/
+
+#ifndef  LEPTONICA_BILATERAL_H
+#define  LEPTONICA_BILATERAL_H
+
+/*
+ *  Contains the following struct
+ *      struct L_Bilateral
+ *
+ *
+ *  For a tutorial introduction to bilateral filters, which apply a
+ *  gaussian blur to smooth parts of the image while preserving edges, see
+ *    http://people.csail.mit.edu/sparis/bf_course/slides/03_definition_bf.pdf
+ *
+ *  We give an implementation of a bilateral filtering algorithm given in:
+ *    "Real-Time O(1) Bilateral Filtering," by Yang, Tan and Ahuja, CVPR 2009
+ *  which is at:
+ *    http://vision.ai.uiuc.edu/~qyang6/publications/cvpr-09-qingxiong-yang.pdf
+ *  This is based on an earlier algorithm by Sylvain Paris and Frédo Durand:
+ *    http://people.csail.mit.edu/sparis/publi/2006/eccv/
+ *               Paris_06_Fast_Approximation.pdf
+ *
+ *  The kernel of the filter is a product of a spatial gaussian and a
+ *  monotonically decreasing function of the difference in intensity
+ *  between the source pixel and the neighboring pixel.  The intensity
+ *  part of the filter gives higher influence for pixels with intensities
+ *  that are near to the source pixel, and the spatial part of the
+ *  filter gives higher weight to pixels that are near the source pixel.
+ *  This combination smooths in relatively uniform regions, while
+ *  maintaining edges.
+ *
+ *  The advantage of the appoach of Yang et al is that it is separable,
+ *  so the computation time is linear in the gaussian filter size.
+ *  Furthermore, it is possible to do much of the computation as a reduced
+ *  scale, which gives a good approximation to the full resolution version
+ *  but greatly speeds it up.
+ *
+ *  The bilateral filtered value at x is:
+ *
+ *            sum[y in N(x)]: spatial(|y - x|) * range(|I(x) - I(y)|) * I(y)
+ *    I'(x) = --------------------------------------------------------------
+ *            sum[y in N(x)]: spatial(|y - x|) * range(|I(x) - I(y)|)
+ *
+ *  where I() is the input image, I'() is the filtered image, N(x) is the
+ *  set of pixels around x in the filter support, and spatial() and range()
+ *  are gaussian functions:
+ *          spatial(x) = exp(-x^2 / (2 * s_s^2))
+ *          range(x) = exp(-x^2 / (2 * s_r^2))
+ *  and s_s and s_r and the standard deviations of the two gaussians.
+ *
+ *  Yang et al use a separable approximation to this, by defining a set
+ *  of related but separable functions J(k,x), that we call Principal
+ *  Bilateral Components (PBC):
+ *
+ *             sum[y in N(x)]: spatial(|y - x|) * range(|k - I(y)|) * I(y)
+ *    J(k,x) = -----------------------------------------------------------
+ *             sum[y in N(x)]: spatial(|y - x|) * range(|k - I(y)|)
+ *
+ *  which are computed quickly for a set of n values k[p], p = 0 ... n-1.
+ *  Then each output pixel is found using a linear interpolation:
+ *
+ *    I'(x) = (1 - q) * J(k[p],x) + q * J(k[p+1],x)
+ *
+ *  where J(k[p],x) and J(k[p+1],x) are PBC for which
+ *    k[p] <= I(x) and k[p+1] >= I(x), and
+ *    q = (I(x) - k[p]) / (k[p+1] - k[p]).
+ *
+ *  We can also subsample I(x), create subsampled versions of J(k,x),
+ *  which are then interpolated between for I'(x).
+ *
+ *  We generate 'pixsc', by optionally downscaling the input image
+ *  (using area mapping by the factor 'reduction'), and then adding
+ *  a mirrored border to avoid boundary cases.  This is then used
+ *  to compute 'ncomps' PBCs.
+ *
+ *  The 'spatial_stdev' is also downscaled by 'reduction'.  The size
+ *  of the 'spatial' array is 4 * (reduced 'spatial_stdev') + 1.
+ *  The size of the 'range' array is 256.
+ */
+
+
+/*------------------------------------------------------------------------*
+ *                          Bilateral filter                              *
+ *------------------------------------------------------------------------*/
+struct L_Bilateral
+{
+    struct Pix      *pixs;           /* clone of source pix                  */
+    struct Pix      *pixsc;          /* downscaled pix with mirrored border  */
+    l_int32          reduction;      /* 1, 2 or 4x for intermediates         */
+    l_float32        spatial_stdev;  /* stdev of spatial gaussian            */
+    l_float32        range_stdev;    /* stdev of range gaussian              */
+    l_float32       *spatial;        /* 1D gaussian spatial kernel           */
+    l_float32       *range;          /* one-sided gaussian range kernel      */
+    l_int32          minval;         /* min value in 8 bpp pix               */
+    l_int32          maxval;         /* max value in 8 bpp pix               */
+    l_int32          ncomps;         /* number of intermediate results       */
+    l_int32         *nc;             /* set of k values (size ncomps)        */
+    l_int32         *kindex;         /* mapping from intensity to lower k    */
+    l_float32       *kfract;         /* mapping from intensity to fract k    */
+    struct Pixa     *pixac;          /* intermediate result images (PBC)     */
+    l_uint32      ***lineset;        /* lineptrs for pixac                   */
+};
+typedef struct L_Bilateral  L_BILATERAL;
+
+
+#endif  /* LEPTONICA_BILATERAL_H */
diff --git a/TesseractOCR/include/leptonica/bmf.h b/TesseractOCR/include/leptonica/bmf.h
@@ -10,7 +10,7 @@
  -     copyright notice, this list of conditions and the following
  -     disclaimer in the documentation and/or other materials
  -     provided with the distribution.
- - 
+ -
  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -27,7 +27,7 @@
 #ifndef  LEPTONICA_BMF_H
 #define  LEPTONICA_BMF_H
 
-/* 
+/*
  *  bmf.h
  *
  *     Simple data structure to hold bitmap fonts and related data

diff --git a/TesseractOCR/include/leptonica/bmp.h b/TesseractOCR/include/leptonica/bmp.h
@@ -10,7 +10,7 @@
  -     copyright notice, this list of conditions and the following
  -     disclaimer in the documentation and/or other materials
  -     provided with the distribution.
- - 
+ -
  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

diff --git a/TesseractOCR/include/leptonica/ccbord.h b/TesseractOCR/include/leptonica/ccbord.h
@@ -10,7 +10,7 @@
  -     copyright notice, this list of conditions and the following
  -     disclaimer in the documentation and/or other materials
  -     provided with the distribution.
- - 
+ -
  -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR